Skip to content

Commit

Permalink
[3.12] gh-114763: Protect lazy loading modules from attribute access …
Browse files Browse the repository at this point in the history
…races (GH-114781) (GH-115870)

gh-114763: Protect lazy loading modules from attribute access races (GH-114781)

Setting the __class__ attribute of a lazy-loading module to ModuleType enables other threads to attempt to access attributes before the loading is complete. Now that is protected by a lock.
(cherry picked from commit 200271c)

Co-authored-by: Chris Markiewicz <[email protected]>
  • Loading branch information
miss-islington and effigies authored Feb 26, 2024
1 parent 53b84e7 commit 7b91b90
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 32 deletions.
81 changes: 51 additions & 30 deletions Lib/importlib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import _imp
import sys
import threading
import types


Expand Down Expand Up @@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType):

def __getattribute__(self, attr):
"""Trigger the load of the module and return the attribute."""
# All module metadata must be garnered from __spec__ in order to avoid
# using mutated values.
# Stop triggering this method.
self.__class__ = types.ModuleType
# Get the original name to make sure no object substitution occurred
# in sys.modules.
original_name = self.__spec__.name
# Figure out exactly what attributes were mutated between the creation
# of the module and now.
attrs_then = self.__spec__.loader_state['__dict__']
attrs_now = self.__dict__
attrs_updated = {}
for key, value in attrs_now.items():
# Code that set the attribute may have kept a reference to the
# assigned object, making identity more important than equality.
if key not in attrs_then:
attrs_updated[key] = value
elif id(attrs_now[key]) != id(attrs_then[key]):
attrs_updated[key] = value
self.__spec__.loader.exec_module(self)
# If exec_module() was used directly there is no guarantee the module
# object was put into sys.modules.
if original_name in sys.modules:
if id(self) != id(sys.modules[original_name]):
raise ValueError(f"module object for {original_name!r} "
"substituted in sys.modules during a lazy "
"load")
# Update after loading since that's what would happen in an eager
# loading situation.
self.__dict__.update(attrs_updated)
__spec__ = object.__getattribute__(self, '__spec__')
loader_state = __spec__.loader_state
with loader_state['lock']:
# Only the first thread to get the lock should trigger the load
# and reset the module's class. The rest can now getattr().
if object.__getattribute__(self, '__class__') is _LazyModule:
# The first thread comes here multiple times as it descends the
# call stack. The first time, it sets is_loading and triggers
# exec_module(), which will access module.__dict__, module.__name__,
# and/or module.__spec__, reentering this method. These accesses
# need to be allowed to proceed without triggering the load again.
if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'):
return object.__getattribute__(self, attr)
loader_state['is_loading'] = True

__dict__ = object.__getattribute__(self, '__dict__')

# All module metadata must be gathered from __spec__ in order to avoid
# using mutated values.
# Get the original name to make sure no object substitution occurred
# in sys.modules.
original_name = __spec__.name
# Figure out exactly what attributes were mutated between the creation
# of the module and now.
attrs_then = loader_state['__dict__']
attrs_now = __dict__
attrs_updated = {}
for key, value in attrs_now.items():
# Code that set an attribute may have kept a reference to the
# assigned object, making identity more important than equality.
if key not in attrs_then:
attrs_updated[key] = value
elif id(attrs_now[key]) != id(attrs_then[key]):
attrs_updated[key] = value
__spec__.loader.exec_module(self)
# If exec_module() was used directly there is no guarantee the module
# object was put into sys.modules.
if original_name in sys.modules:
if id(self) != id(sys.modules[original_name]):
raise ValueError(f"module object for {original_name!r} "
"substituted in sys.modules during a lazy "
"load")
# Update after loading since that's what would happen in an eager
# loading situation.
__dict__.update(attrs_updated)
# Finally, stop triggering this method.
self.__class__ = types.ModuleType

return getattr(self, attr)

def __delattr__(self, attr):
Expand Down Expand Up @@ -244,5 +263,7 @@ def exec_module(self, module):
loader_state = {}
loader_state['__dict__'] = module.__dict__.copy()
loader_state['__class__'] = module.__class__
loader_state['lock'] = threading.RLock()
loader_state['is_loading'] = False
module.__spec__.loader_state = loader_state
module.__class__ = _LazyModule
42 changes: 40 additions & 2 deletions Lib/test/test_importlib/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
from importlib import abc
from importlib import util
import sys
import time
import threading
import types
import unittest

from test.support import threading_helper
from test.test_importlib import util as test_util


Expand Down Expand Up @@ -40,6 +43,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
module_name = 'lazy_loader_test'
mutated_name = 'changed'
loaded = None
load_count = 0
source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)

def find_spec(self, name, path, target=None):
Expand All @@ -48,8 +52,10 @@ def find_spec(self, name, path, target=None):
return util.spec_from_loader(name, util.LazyLoader(self))

def exec_module(self, module):
time.sleep(0.01) # Simulate a slow load.
exec(self.source_code, module.__dict__)
self.loaded = module
self.load_count += 1


class LazyLoaderTests(unittest.TestCase):
Expand All @@ -59,8 +65,9 @@ def test_init(self):
# Classes that don't define exec_module() trigger TypeError.
util.LazyLoader(object)

def new_module(self, source_code=None):
loader = TestingImporter()
def new_module(self, source_code=None, loader=None):
if loader is None:
loader = TestingImporter()
if source_code is not None:
loader.source_code = source_code
spec = util.spec_from_loader(TestingImporter.module_name,
Expand Down Expand Up @@ -140,6 +147,37 @@ def test_module_already_in_sys(self):
# Force the load; just care that no exception is raised.
module.__name__

@threading_helper.requires_working_threading()
def test_module_load_race(self):
with test_util.uncache(TestingImporter.module_name):
loader = TestingImporter()
module = self.new_module(loader=loader)
self.assertEqual(loader.load_count, 0)

class RaisingThread(threading.Thread):
exc = None
def run(self):
try:
super().run()
except Exception as exc:
self.exc = exc

def access_module():
return module.attr

threads = []
for _ in range(2):
threads.append(thread := RaisingThread(target=access_module))
thread.start()

# Races could cause errors
for thread in threads:
thread.join()
self.assertIsNone(thread.exc)

# Or multiple load attempts
self.assertEqual(loader.load_count, 1)


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Protect modules loaded with :class:`importlib.util.LazyLoader` from race
conditions when multiple threads try to access attributes before the loading
is complete.

0 comments on commit 7b91b90

Please sign in to comment.