Skip to content

Commit

Permalink
Improve scrubbing of namespace packages
Browse files Browse the repository at this point in the history
Problem: since we can't force python -S using /usr/bin/env, we can't prevent
importing site.  But 'import site' populates sys.modules with placeholder
modules containing .__path__ attributes with site-packages baked in.  This
means even if we scrub sys.path and sys.path_importer_cache, it's still
possible for __import__ to pick up things from site-packages.  This scrubs
sys.modules as well.

Reviewed at https://rbcommons.com/s/twitter/r/116/

(sapling split of a11f3bc4d0fa25aebf803ce2b511e7bff45f83f5)
  • Loading branch information
wickman committed Mar 18, 2014
1 parent 71bb9c3 commit 69649c1
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 74 deletions.
91 changes: 32 additions & 59 deletions src/python/twitter/common/python/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,29 +27,31 @@
import zipfile


_MKDTEMP_CLEANER = None
_MKDTEMP_DIRS = defaultdict(set)
_MKDTEMP_LOCK = threading.RLock()
# See http://stackoverflow.com/questions/2572172/referencing-other-modules-in-atexit
class MktempTeardownRegistry(object):
def __init__(self):
self._registry = defaultdict(set)
self._getpid = os.getpid
self._lock = threading.RLock()
self._exists = os.path.exists
self._rmtree = shutil.rmtree
atexit.register(self.teardown)

def __del__(self):
self.teardown()

def _mkdtemp_atexit_cleaner():
for td in _MKDTEMP_DIRS.pop(os.getpid(), []):
safe_rmtree(td)
def register(self, path):
with self._lock:
self._registry[self._getpid()].add(path)
return path

def teardown(self):
for td in self._registry.pop(self._getpid(), []):
if self._exists(td):
self._rmtree(td)

def _mkdtemp_unregister_cleaner():
global _MKDTEMP_CLEANER
_MKDTEMP_CLEANER = None


def _mkdtemp_register_cleaner(cleaner):
global _MKDTEMP_CLEANER
if not cleaner:
return
assert callable(cleaner)
if _MKDTEMP_CLEANER is None:
atexit.register(cleaner)
_MKDTEMP_CLEANER = cleaner
_MKDTEMP_SINGLETON = MktempTeardownRegistry()


@contextlib.contextmanager
Expand All @@ -61,32 +63,20 @@ def open_zip(path, *args, **kwargs):
yield zip


def safe_mkdtemp(cleaner=_mkdtemp_atexit_cleaner, **kw):
def safe_mkdtemp(**kw):
"""
Given the parameters to standard tempfile.mkdtemp, create a temporary directory
that is cleaned up on process exit.
"""
# proper lock sanitation on fork [issue 6721] would be desirable here.
with _MKDTEMP_LOCK:
return register_rmtree(tempfile.mkdtemp(**kw), cleaner=cleaner)
return _MKDTEMP_SINGLETON.register(tempfile.mkdtemp(**kw))


def register_rmtree(directory, cleaner=_mkdtemp_atexit_cleaner):
def register_rmtree(directory):
"""
Register an existing directory to be cleaned up at process exit.
"""
with _MKDTEMP_LOCK:
_mkdtemp_register_cleaner(cleaner)
_MKDTEMP_DIRS[os.getpid()].add(directory)
return directory


def safe_rmtree(directory):
"""
Delete a directory if it's present. If it's not present, no-op.
"""
if os.path.exists(directory):
shutil.rmtree(directory, True)
return _MKDTEMP_SINGLETON.register(directory)


def safe_mkdir(directory, clean=False):
Expand Down Expand Up @@ -123,6 +113,14 @@ def safe_delete(filename):
raise


def safe_rmtree(directory):
"""
Delete a directory if it's present. If it's not present, no-op.
"""
if os.path.exists(directory):
shutil.rmtree(directory, True)


def chmod_plus_x(path):
"""
Equivalent of unix `chmod a+x path`
Expand Down Expand Up @@ -168,31 +166,6 @@ def touch(file, times=None):
os.utime(file, times)


@contextlib.contextmanager
def mutable_sys():
"""
A with-context that does backup/restore of sys.argv, sys.path and
sys.stderr/stdout/stdin following execution.
"""
SAVED_ATTRIBUTES = [
'stdin', 'stdout', 'stderr',
'argv', 'path', 'path_importer_cache', 'path_hooks',
'modules', '__egginsert'
]

_sys_backup = dict((key, getattr(sys, key)) for key in SAVED_ATTRIBUTES if hasattr(sys, key))
_sys_delete = set(filter(lambda key: not hasattr(sys, key), SAVED_ATTRIBUTES))

try:
yield sys
finally:
for attribute in _sys_backup:
setattr(sys, attribute, _sys_backup[attribute])
for attribute in _sys_delete:
if hasattr(sys, attribute):
delattr(sys, attribute)


class Chroot(object):
"""
A chroot of files overlayed from one directory to another directory.
Expand Down
76 changes: 61 additions & 15 deletions src/python/twitter/common/python/pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from site import USER_SITE
import sys

from .common import mutable_sys, safe_mkdir
from .common import safe_mkdir
from .compatibility import exec_function
from .environment import PEXEnvironment
from .interpreter import PythonInterpreter
Expand Down Expand Up @@ -87,19 +87,20 @@ def _site_libs(cls):
sysconfig.get_python_lib(plat_specific=True)])

@classmethod
def minimum_path(cls):
"""
Return as a tuple the emulated sys.path and sys.path_importer_cache of
a bare python installation, a la python -S.
"""
site_libs = set(cls._site_libs())
for site_lib in site_libs:
TRACER.log('Found site-library: %s' % site_lib)
for extras_path in cls._extras_paths():
TRACER.log('Found site extra: %s' % extras_path)
site_libs.add(extras_path)
site_libs = set(os.path.normpath(path) for path in site_libs)
def minimum_sys_modules(cls, site_libs):
new_modules = {}

for module_name, module in sys.modules.items():
if any(path.startswith(site_lib) for path in getattr(module, '__path__', ())
for site_lib in site_libs):
TRACER.log('Scrubbing %s from sys.modules' % module)
else:
new_modules[module_name] = module

return new_modules

@classmethod
def minimum_sys_path(cls, site_libs):
site_distributions = OrderedSet()
for path_element in sys.path:
if any(path_element.startswith(site_lib) for site_lib in site_libs):
Expand All @@ -122,9 +123,30 @@ def minimum_path(cls):
if key not in scrub_from_importer_cache)
return scrubbed_sys_path, scrubbed_importer_cache

@classmethod
def minimum_sys(cls):
"""Return the minimum sys necessary to run this interpreter, a la python -S.
:returns: (sys.path, sys.path_importer_cache, sys.modules) tuple of a
bare python installation.
"""
site_libs = set(cls._site_libs())
for site_lib in site_libs:
TRACER.log('Found site-library: %s' % site_lib)
for extras_path in cls._extras_paths():
TRACER.log('Found site extra: %s' % extras_path)
site_libs.add(extras_path)
site_libs = set(os.path.normpath(path) for path in site_libs)

sys_modules = cls.minimum_sys_modules(site_libs)
sys_path, sys_path_importer_cache = cls.minimum_sys_path(site_libs)

return sys_path, sys_path_importer_cache, sys_modules

@classmethod
@contextmanager
def patch_pkg_resources(cls, working_set):
"""Patch pkg_resources given a new working set."""
def patch(working_set):
pkg_resources.working_set = working_set
pkg_resources.require = working_set.require
Expand All @@ -139,10 +161,34 @@ def patch(working_set):
finally:
patch(old_working_set)

@classmethod
@contextmanager
def patch_sys(cls):
"""Patch sys with all site scrubbed."""
def patch_dict(old_value, new_value):
old_value.clear()
old_value.update(new_value)

def patch_all(path, path_importer_cache, modules):
sys.path[:] = path
patch_dict(sys.path_importer_cache, path_importer_cache)
patch_dict(sys.modules, modules)

old_sys_path, old_sys_path_importer_cache, old_sys_modules = (
sys.path[:], sys.path_importer_cache.copy(), sys.modules.copy())
new_sys_path, new_sys_path_importer_cache, new_sys_modules = cls.minimum_sys()

patch_all(new_sys_path, new_sys_path_importer_cache, new_sys_modules)

try:
yield
finally:
patch_all(old_sys_path, old_sys_path_importer_cache, old_sys_modules)

def execute(self, args=()):
entry_point = self.entry()
with mutable_sys():
sys.path, sys.path_importer_cache = self.minimum_path()

with self.patch_sys():
working_set = self._env.activate()
if 'PEX_COVERAGE' in os.environ:
PEX.start_coverage()
Expand Down

0 comments on commit 69649c1

Please sign in to comment.