diff --git a/build_tools/py/BUILD b/build_tools/py/BUILD index 70891c8c..234d3fa9 100644 --- a/build_tools/py/BUILD +++ b/build_tools/py/BUILD @@ -57,30 +57,6 @@ filegroup( visibility = ["//visibility:public"], ) -# All Python 2.7 dbx_py_binarys implicitly depend on this target in -# order to 1) provide the import hook that loads our custom pycs 2) -# pick up Bazel's magic builtin __init__.py generation behavior. -py_binary( - name = "dbx_importer", - srcs = [":dbx_importer_lib"], - python_version = "PY2", -) - -py_library( - name = "dbx_importer_lib", - srcs = ["dbx_importer.py"], - srcs_version = "PY2ONLY", -) - -dbx_internal_bootstrap_py_binary( - name = "dbx_compile", - srcs = ["dbx_compile.py"], - main = "dbx_compile.py", - deps = [ - ":dbx_importer", - ], -) - dbx_internal_bootstrap_py_binary( name = "py3_compile", srcs = ["py3_compile.py"], diff --git a/build_tools/py/BUILD.in b/build_tools/py/BUILD.in index 2df3ecfc..79631db4 100644 --- a/build_tools/py/BUILD.in +++ b/build_tools/py/BUILD.in @@ -54,30 +54,6 @@ filegroup( visibility = ["//visibility:public"], ) -# All Python 2.7 dbx_py_binarys implicitly depend on this target in -# order to 1) provide the import hook that loads our custom pycs 2) -# pick up Bazel's magic builtin __init__.py generation behavior. -py_binary( - name = "dbx_importer", - srcs = [":dbx_importer_lib"], - python_version = "PY2", -) - -py_library( - name = "dbx_importer_lib", - srcs = ["dbx_importer.py"], - srcs_version = "PY2ONLY", -) - -dbx_internal_bootstrap_py_binary( - name = "dbx_compile", - srcs = ["dbx_compile.py"], - main = "dbx_compile.py", - deps = [ - ":dbx_importer", - ], -) - dbx_internal_bootstrap_py_binary( name = "py3_compile", srcs = ["py3_compile.py"], diff --git a/build_tools/py/common.bzl b/build_tools/py/common.bzl index 3799aff5..23626d81 100644 --- a/build_tools/py/common.bzl +++ b/build_tools/py/common.bzl @@ -92,7 +92,6 @@ sys.path.extend([ runfiles + {path_sep} + p for p in ({relative_piplib_python_path}) ]) -{dbx_importer} import os try: fd = os.open('/proc/self/comm', os.O_WRONLY) @@ -119,13 +118,6 @@ with open(filepath, 'rb') as f: exec(code, module.__dict__) """ -_setup_dbx_importer = """ -sys.path.insert(0, runfiles + '/../{workspace}') -from build_tools.py import dbx_importer -del sys.path[0] -dbx_importer.install() -""" - def allow_dynamic_links(ctx): if hasattr(ctx.attr, "dynamic_libraries") and ctx.attr.dynamic_libraries: return True @@ -312,7 +304,6 @@ def emit_py_binary( if internal_bootstrap: extra_pythonpath = depset(direct = [pythonpath]) - dbx_importer = "" else: # Only collect dependencies from dbx_py_library and # dbx_py_pypi* rules for non-bootstrap binaries. Those @@ -329,16 +320,7 @@ def emit_py_binary( deps = deps, data = data, ) - if py_toolchain.dbx_importer: - # The importer is only used on py2 non-bootstrap builds - # (bootstrap builds don't read dropbox's pyc). - extra_pythonpath = depset(transitive = [extra_pythonpath], direct = [pythonpath, workspace_root_to_pythonpath(py_toolchain.dbx_importer.label.workspace_root)]) - dbx_importer = _setup_dbx_importer.format( - workspace = py_toolchain.dbx_importer.label.workspace_name, - ) - else: - extra_pythonpath = depset(transitive = [extra_pythonpath], direct = [pythonpath]) - dbx_importer = "" + extra_pythonpath = depset(transitive = [extra_pythonpath], direct = [pythonpath]) piplib_contents_set = piplib_contents[build_tag] all_piplib_contents = piplib_contents_set.to_list() @@ -468,7 +450,6 @@ __path__.extend([os.path.join(os.environ['RUNFILES'], d) for d in (%s,)]) proc_title = repr(ctx.label.name[:15]), relative_user_python_path = user_python_path, relative_piplib_python_path = piplib_python_path, - dbx_importer = dbx_importer, path_sep = repr("\\" if is_windows(ctx) else "/"), ), ) @@ -529,13 +510,8 @@ __path__.extend([os.path.join(os.environ['RUNFILES'], d) for d in (%s,)]) for d in data: runfiles = runfiles.merge(d[DefaultInfo].default_runfiles) - if py_toolchain and py_toolchain.dbx_importer: - # Manually add dbx_import.py. This also implicitly picks up Bazel's magically automatic - # __init__.py insertion behavior, which is why we add it unconditionally. - runfiles = runfiles.merge(py_toolchain.dbx_importer[DefaultInfo].default_runfiles) - else: - # Add blank_py_binary to trigger Bazel's automatic __init__.py insertion behavior. - runfiles = runfiles.merge(ctx.attr._blank_py_binary[DefaultInfo].default_runfiles) + # Add blank_py_binary to trigger Bazel's automatic __init__.py insertion behavior. + runfiles = runfiles.merge(ctx.attr._blank_py_binary[DefaultInfo].default_runfiles) write_runfiles_tmpl( ctx, diff --git a/build_tools/py/dbx_compile.py b/build_tools/py/dbx_compile.py deleted file mode 100644 index dcb6d522..00000000 --- a/build_tools/py/dbx_compile.py +++ /dev/null @@ -1,21 +0,0 @@ -from __future__ import print_function - -import sys - -from build_tools.py import dbx_importer - -if __name__ == "__main__": - with open(sys.argv[1]) as fp: - allow_failures = fp.readline() == "--allow-failures\n" - items = fp.read().splitlines() - assert len(items) % 3 == 0 - n = len(items) // 3 - worked = False - for src_path, short_path, dest_path in zip( - items[:n], items[n : 2 * n], items[2 * n :] - ): - if dbx_importer.dbx_compile(src_path, dest_path, short_path, allow_failures): - worked = True - if not worked: - print("all files failed to compile") - sys.exit(1) diff --git a/build_tools/py/dbx_importer.py b/build_tools/py/dbx_importer.py deleted file mode 100644 index 612ee8c3..00000000 --- a/build_tools/py/dbx_importer.py +++ /dev/null @@ -1,230 +0,0 @@ -# coding: utf-8 - -# mypy: allow-untyped-defs, no-check-untyped-defs - -# We implement two custom behaviors on top of normal Python import: -# -# 1. We have our own pyc format that uses a hash of the source file for invalidation rather than the -# source timestamp. This makes pycs a deterministic function of the source file content. That -# property lets us build and cache pycs with Bazel. -# -# 2. We cache directory listings of directories on sys.path and arrange to search the cache instead -# of the filesystem when scanning for top-level modules. This obviates a myriad stat(2) calls when -# sys.path is long. We only cache the directory listings of directories directly on sys.path. The -# search path of packages is usually only one directory, so we would save nothing by caching. -# -# In Python 3, importlib does caching internally, and deteministic pycs may be had with PEP -# 552. Therefore, none of this file should be used on Python 3. - -from __future__ import print_function - -import ast -import errno -import hashlib -import imp -import marshal -import os -import pkgutil -import sys -import warnings - -DBX_MAGIC = b"dbx" + imp.get_magic() - -# Unfortunately, it is critical that DBXImporter subclasses pkgutil.ImpImporter. pkg_resources -# inspects of the MRO of path importer while building its global distribution working -# set. Technically, we could register our importer with pkg_resources.register_finder(), but I don't -# want to pull in pkg_resources if we don't have to—building the aforementioned global working set -# is not cheap. -class DBXImporter(pkgutil.ImpImporter): - def __init__(self, d, cache=False): - # type: (str, bool) -> None - pkgutil.ImpImporter.__init__(self, d) - self._dir_ents = os.listdir(d) if cache else None - - def find_module(self, fullname, path=None): - name = fullname.rpartition(".")[2] - if path is None: - path = [self.path] - if self._dir_ents is not None: - ext = name + ".so" - py = name + ".py" - d = os.path.join(self.path, name) - if ( - ext not in self._dir_ents - and py not in self._dir_ents - and ( - name not in self._dir_ents - or not os.path.exists(os.path.join(d, "__init__.py")) - ) - ): - # Can't possibly be in this directory. - return None - - # It would be nice to not catch any ImportError from imp.find_module. Normally, when we - # reach this spot, we're sure the module we're searching for, if it exists, must be in our - # path directory. Not catching the ImportError would be a nice assertion of - # that. Unfortunately, this property isn't true in two cases: 1) For namespace packages, we - # have to let import delegate to the next entry in the package's __path__. 2) Code that - # manually invokes import finders expects .find_module() to return None not raise an - # ImportError if a module isn't found. - try: - mod_data = imp.find_module(name, path) - except ImportError: - return None - return DBXLoader(fullname, *mod_data) - - -class DBXLoader(pkgutil.ImpLoader): - HASH_LEN = hashlib.md5().digest_size - - def load_module(self, fullname): - assert fullname == self.fullname - try: - if fullname in sys.modules: - return sys.modules[fullname] - kind = self.etc[2] - if kind == imp.PKG_DIRECTORY: - filename = os.path.join(self.filename, "__init__.py") - self.file = open(filename, "rb") - if sys.path_importer_cache.get(self.filename) is None: - sys.path_importer_cache[self.filename] = DBXImporter(self.filename) - mod = self._attempt_dbxpyc_import(fullname, filename, self.file, True) - if mod is not None: - for p in mod.__path__: - if sys.path_importer_cache.get(p) is None: - sys.path_importer_cache[p] = DBXImporter(p) - return mod - elif kind == imp.PY_SOURCE: - mod = self._attempt_dbxpyc_import( - fullname, self.filename, self.file, False - ) - if mod is not None: - return mod - mod = imp.load_module(fullname, self.file, self.filename, self.etc) - finally: - if self.file is not None: - self.file.close() - self.file = None - return mod - - def _attempt_dbxpyc_import(self, fullname, filename, fp, pkg): - co = None - try: - with open(filename + "dbxc", "rb") as dbxc_fd: - magic = dbxc_fd.read(len(DBX_MAGIC)) - if magic != DBX_MAGIC: - return None - dbxc_hash = dbxc_fd.read(self.HASH_LEN) - co = marshal.load(dbxc_fd) - except IOError as e: - if e.errno != errno.ENOENT: - raise - # Handle empty sources files without dbxpyc files. This case is quite common because - # Bazel-generated __init__.py files don't have dbxpycs. - if os.fstat(fp.fileno()).st_size != 0: - return None - else: - hasher = hashlib.md5(fp.read()) - if dbxc_hash != hasher.digest(): - # Rewind, so imp.load_module can process the file. - fp.seek(0) - return None - - try: - mod = sys.modules[fullname] = imp.new_module(fullname) - if pkg: - mod.__path__ = [os.path.dirname(filename)] - mod.__package__ = fullname - else: - mod.__package__ = fullname.rpartition(".")[0] - # We have relative paths inside the code objects for determinism. At runtime, we set - # __file__ to a full path so os.path.dirname(__file__) and the like still return the - # paths our users expect. We are also not using dropbox.runfiles to keep the - # dbx_py_binary required dependencies as small as possible. - if co is not None: - mod.__file__ = os.path.join(os.environ["RUNFILES"], co.co_filename) - # Silence the cryptography deprecation warning. This seems to be the only - # place to reliably silence it. - warnings.filterwarnings( - "ignore", - message="Python 2 is no longer supported by the Python core team", - ) - exec(co, mod.__dict__) - else: - mod.__file__ = filename - return sys.modules[fullname] - except: - if fullname in sys.modules: - del sys.modules[fullname] - raise - - -def install(): - # type: () -> None - for p in sys.path: - if os.path.isdir(p): - sys.path_importer_cache[p] = DBXImporter(p, True) - - -DOCSTRING_STRIP_EXCEPTIONS = [ - "capirca", - "statsmodel", - # scikit-image relies on docstrings to be present - # https://github.com/scikit-image/scikit-image/blob/master/skimage/measure/_regionprops.py#L977 - "scikit-image", - # seaborn relies on docstrings internally - # https://github.com/mwaskom/seaborn/blob/master/seaborn/_docstrings.py - "seaborn", - "stone", -] - - -def dbx_compile(src_path, dest_path, compiled_path, allow_failure): - # type: (str, str, str, bool) -> bool - try: - with open(src_path, "U") as f: - src = f.read() - - root = ast.parse(src) - - if not any(lib in src_path for lib in DOCSTRING_STRIP_EXCEPTIONS): - # Strip the docstrings to reduce binary size and memory usage. - for node in ast.walk(root): - # See https://github.com/python/cpython/blob/2.7/Lib/ast.py#L187 - if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.Module)): - if ( - node.body - and isinstance(node.body[0], ast.Expr) - and isinstance(node.body[0].value, ast.Str) - ): - # These libraries assume the existence of docstrings on their own methods - # and provide decorators that deprecate methods by munging their docstring. - # TODO(zbarsky): remove these exceptions after sending patches upstream - if ( - "pylons" in src_path - or "paste" in src_path - or "weberror" in src_path - or "notebook" in src_path - ): - node.body[0].value.s = " " - elif "scipy" in src_path: - # TODO(zbarsky) remove if https://github.com/scipy/scipy/pull/10848 is merged - node.body[0].value.s = "Parameters\n%s" - else: - node.body[0].value.s = "" - - h = hashlib.md5(src) - co = compile(root, compiled_path, "exec", dont_inherit=True) - - with open(dest_path, "wb") as f: - f.write(DBX_MAGIC) - f.write(h.digest()) - marshal.dump(co, f) - except Exception: - if allow_failure: - open(dest_path, "wb").close() - return False - else: - print(src_path, "->", dest_path) - raise - return True diff --git a/build_tools/py/toolchain.bzl b/build_tools/py/toolchain.bzl index c5ae73c2..621b56ed 100644 --- a/build_tools/py/toolchain.bzl +++ b/build_tools/py/toolchain.bzl @@ -77,7 +77,6 @@ def _dbx_py_toolchain_impl(ctx): pyc_compile_exe = ctx.executable.pyc_compile, pyc_compile_files_to_run = ctx.attr.pyc_compile[DefaultInfo].files_to_run, pyc_compilation_enabled = ctx.attr.pyc_compilation_enabled, - dbx_importer = ctx.attr.dbx_importer, ), ] @@ -87,7 +86,6 @@ dbx_py_toolchain = rule( "interpreter": attr.label(mandatory = True), "pyc_compile": attr.label(mandatory = True, executable = True, cfg = "host"), "pyc_compilation_enabled": attr.bool(default = True), - "dbx_importer": attr.label(), }, doc = """ Python toolchain. @@ -106,17 +104,12 @@ Attributes: - pyc_compilation_enabled: Optional. A boolean that affects whether or not pyc files will be generated with this toolchain. Default is True. - - dbx_importer: Optional. A py_library that's used to import - Dropbox's custom pyc files. - The toolchain returns the following fields: - interpreter: The dbx_py_interpreter for the build_tag. - pyc_compile_exe: The executable file for pyc_compile. - pyc_compile_files_to_run: The runfiles for pyc_compile. - pyc_compilation_enabled: Whether or not pyc files should be created - - dbx_importer: The importer attribute, or None if it's not passed - in. For some reason, executables don't contain the runfiles when you add them as an executable for a `ctx.actions.run` action. You need to make