From 096d0097a09e439a4564531b297a998e5d74c9b5 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 14 Feb 2023 14:26:03 -0700 Subject: [PATCH] gh-101758: Add a Test For Single-Phase Init Module Variants (gh-101891) The new test exercises the most important variants for single-phase init extension modules. We also add some explanation about those variants to import.c. https://github.com/python/cpython/issues/101758 --- Lib/test/test_imp.py | 199 +++++++++++++++++++ Modules/_testsinglephase.c | 394 ++++++++++++++++++++++++++++++++++--- Python/import.c | 105 +++++++++- 3 files changed, 660 insertions(+), 38 deletions(-) diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py index 80abc720c3251a..31dce21587e2ca 100644 --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -251,6 +251,205 @@ def test_issue16421_multiple_modules_in_one_dll(self): with self.assertRaises(ImportError): imp.load_dynamic('nonexistent', pathname) + @requires_load_dynamic + def test_singlephase_variants(self): + '''Exercise the most meaningful variants described in Python/import.c.''' + self.maxDiff = None + + basename = '_testsinglephase' + fileobj, pathname, _ = imp.find_module(basename) + fileobj.close() + + modules = {} + def load(name): + assert name not in modules + module = imp.load_dynamic(name, pathname) + self.assertNotIn(module, modules.values()) + modules[name] = module + return module + + def re_load(name, module): + assert sys.modules[name] is module + before = type(module)(module.__name__) + before.__dict__.update(vars(module)) + + reloaded = imp.load_dynamic(name, pathname) + + return before, reloaded + + def check_common(name, module): + summed = module.sum(1, 2) + lookedup = module.look_up_self() + initialized = module.initialized() + cached = sys.modules[name] + + # module.__name__ might not match, but the spec will. + self.assertEqual(module.__spec__.name, name) + if initialized is not None: + self.assertIsInstance(initialized, float) + self.assertGreater(initialized, 0) + self.assertEqual(summed, 3) + self.assertTrue(issubclass(module.error, Exception)) + self.assertEqual(module.int_const, 1969) + self.assertEqual(module.str_const, 'something different') + self.assertIs(cached, module) + + return lookedup, initialized, cached + + def check_direct(name, module, lookedup): + # The module has its own PyModuleDef, with a matching name. + self.assertEqual(module.__name__, name) + self.assertIs(lookedup, module) + + def check_indirect(name, module, lookedup, orig): + # The module re-uses another's PyModuleDef, with a different name. + assert orig is not module + assert orig.__name__ != name + self.assertNotEqual(module.__name__, name) + self.assertIs(lookedup, module) + + def check_basic(module, initialized): + init_count = module.initialized_count() + + self.assertIsNot(initialized, None) + self.assertIsInstance(init_count, int) + self.assertGreater(init_count, 0) + + return init_count + + def check_common_reloaded(name, module, cached, before, reloaded): + recached = sys.modules[name] + + self.assertEqual(reloaded.__spec__.name, name) + self.assertEqual(reloaded.__name__, before.__name__) + self.assertEqual(before.__dict__, module.__dict__) + self.assertIs(recached, reloaded) + + def check_basic_reloaded(module, lookedup, initialized, init_count, + before, reloaded): + relookedup = reloaded.look_up_self() + reinitialized = reloaded.initialized() + reinit_count = reloaded.initialized_count() + + self.assertIs(reloaded, module) + self.assertIs(reloaded.__dict__, module.__dict__) + # It only happens to be the same but that's good enough here. + # We really just want to verify that the re-loaded attrs + # didn't change. + self.assertIs(relookedup, lookedup) + self.assertEqual(reinitialized, initialized) + self.assertEqual(reinit_count, init_count) + + def check_with_reinit_reloaded(module, lookedup, initialized, + before, reloaded): + relookedup = reloaded.look_up_self() + reinitialized = reloaded.initialized() + + self.assertIsNot(reloaded, module) + self.assertIsNot(reloaded, module) + self.assertNotEqual(reloaded.__dict__, module.__dict__) + self.assertIs(relookedup, reloaded) + if initialized is None: + self.assertIs(reinitialized, None) + else: + self.assertGreater(reinitialized, initialized) + + # Check the "basic" module. + + name = basename + expected_init_count = 1 + with self.subTest(name): + mod = load(name) + lookedup, initialized, cached = check_common(name, mod) + check_direct(name, mod, lookedup) + init_count = check_basic(mod, initialized) + self.assertEqual(init_count, expected_init_count) + + before, reloaded = re_load(name, mod) + check_common_reloaded(name, mod, cached, before, reloaded) + check_basic_reloaded(mod, lookedup, initialized, init_count, + before, reloaded) + basic = mod + + # Check its indirect variants. + + name = f'{basename}_basic_wrapper' + expected_init_count += 1 + with self.subTest(name): + mod = load(name) + lookedup, initialized, cached = check_common(name, mod) + check_indirect(name, mod, lookedup, basic) + init_count = check_basic(mod, initialized) + self.assertEqual(init_count, expected_init_count) + + before, reloaded = re_load(name, mod) + check_common_reloaded(name, mod, cached, before, reloaded) + check_basic_reloaded(mod, lookedup, initialized, init_count, + before, reloaded) + + # Currently _PyState_AddModule() always replaces the cached module. + self.assertIs(basic.look_up_self(), mod) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # The cached module shouldn't be changed after this point. + basic_lookedup = mod + + # Check its direct variant. + + name = f'{basename}_basic_copy' + expected_init_count += 1 + with self.subTest(name): + mod = load(name) + lookedup, initialized, cached = check_common(name, mod) + check_direct(name, mod, lookedup) + init_count = check_basic(mod, initialized) + self.assertEqual(init_count, expected_init_count) + + before, reloaded = re_load(name, mod) + check_common_reloaded(name, mod, cached, before, reloaded) + check_basic_reloaded(mod, lookedup, initialized, init_count, + before, reloaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the non-basic variant that has no state. + + name = f'{basename}_with_reinit' + with self.subTest(name): + mod = load(name) + lookedup, initialized, cached = check_common(name, mod) + self.assertIs(initialized, None) + check_direct(name, mod, lookedup) + + before, reloaded = re_load(name, mod) + check_common_reloaded(name, mod, cached, before, reloaded) + check_with_reinit_reloaded(mod, lookedup, initialized, + before, reloaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the basic variant that has state. + + name = f'{basename}_with_state' + with self.subTest(name): + mod = load(name) + lookedup, initialized, cached = check_common(name, mod) + self.assertIsNot(initialized, None) + check_direct(name, mod, lookedup) + + before, reloaded = re_load(name, mod) + check_common_reloaded(name, mod, cached, before, reloaded) + check_with_reinit_reloaded(mod, lookedup, initialized, + before, reloaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + @requires_load_dynamic def test_load_dynamic_ImportError_path(self): # Issue #1559549 added `name` and `path` attributes to ImportError diff --git a/Modules/_testsinglephase.c b/Modules/_testsinglephase.c index 3bfe159e54fe49..9e8dd647ee761a 100644 --- a/Modules/_testsinglephase.c +++ b/Modules/_testsinglephase.c @@ -5,74 +5,412 @@ # define Py_BUILD_CORE_MODULE 1 #endif +//#include #include "Python.h" #include "pycore_namespace.h" // _PyNamespace_New() +typedef struct { + _PyTime_t initialized; + PyObject *error; + PyObject *int_const; + PyObject *str_const; +} module_state; + +/* Process-global state is only used by _testsinglephase + since it's the only one that does not support re-init. */ +static struct { + int initialized_count; + module_state module; +} global_state = { .initialized_count = -1 }; + +static inline module_state * +get_module_state(PyObject *module) +{ + PyModuleDef *def = PyModule_GetDef(module); + if (def->m_size == -1) { + return &global_state.module; + } + else if (def->m_size == 0) { + return NULL; + } + else { + module_state *state = (module_state*)PyModule_GetState(module); + assert(state != NULL); + return state; + } +} + +static void +clear_state(module_state *state) +{ + state->initialized = 0; + Py_CLEAR(state->error); + Py_CLEAR(state->int_const); + Py_CLEAR(state->str_const); +} + +static int +_set_initialized(_PyTime_t *initialized) +{ + /* We go strictly monotonic to ensure each time is unique. */ + _PyTime_t prev; + if (_PyTime_GetMonotonicClockWithInfo(&prev, NULL) != 0) { + return -1; + } + /* We do a busy sleep since the interval should be super short. */ + _PyTime_t t; + do { + if (_PyTime_GetMonotonicClockWithInfo(&t, NULL) != 0) { + return -1; + } + } while (t == prev); + + *initialized = t; + return 0; +} + +static int +init_state(module_state *state) +{ + assert(state->initialized == 0 && + state->error == NULL && + state->int_const == NULL && + state->str_const == NULL); + + if (_set_initialized(&state->initialized) != 0) { + goto error; + } + assert(state->initialized > 0); + + /* Add an exception type */ + state->error = PyErr_NewException("_testsinglephase.error", NULL, NULL); + if (state->error == NULL) { + goto error; + } + + state->int_const = PyLong_FromLong(1969); + if (state->int_const == NULL) { + goto error; + } + + state->str_const = PyUnicode_FromString("something different"); + if (state->str_const == NULL) { + goto error; + } + + return 0; + +error: + clear_state(state); + return -1; +} + +static int +init_module(PyObject *module, module_state *state) +{ + if (PyModule_AddObjectRef(module, "error", state->error) != 0) { + return -1; + } + if (PyModule_AddObjectRef(module, "int_const", state->int_const) != 0) { + return -1; + } + if (PyModule_AddObjectRef(module, "str_const", state->str_const) != 0) { + return -1; + } + return 0; +} + + +PyDoc_STRVAR(common_initialized_doc, +"initialized()\n\ +\n\ +Return the seconds-since-epoch when the module was initialized."); + +static PyObject * +common_initialized(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + module_state *state = get_module_state(self); + if (state == NULL) { + Py_RETURN_NONE; + } + double d = _PyTime_AsSecondsDouble(state->initialized); + return PyFloat_FromDouble(d); +} + +#define INITIALIZED_METHODDEF \ + {"initialized", common_initialized, METH_NOARGS, \ + common_initialized_doc} + + +PyDoc_STRVAR(common_look_up_self_doc, +"look_up_self()\n\ +\n\ +Return the module associated with this module's def.m_base.m_index."); + +static PyObject * +common_look_up_self(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyModuleDef *def = PyModule_GetDef(self); + if (def == NULL) { + return NULL; + } + return Py_NewRef( + PyState_FindModule(def)); +} + +#define LOOK_UP_SELF_METHODDEF \ + {"look_up_self", common_look_up_self, METH_NOARGS, common_look_up_self_doc} + + /* Function of two integers returning integer */ -PyDoc_STRVAR(testexport_foo_doc, -"foo(i,j)\n\ +PyDoc_STRVAR(common_sum_doc, +"sum(i,j)\n\ \n\ Return the sum of i and j."); static PyObject * -testexport_foo(PyObject *self, PyObject *args) +common_sum(PyObject *self, PyObject *args) { long i, j; long res; - if (!PyArg_ParseTuple(args, "ll:foo", &i, &j)) + if (!PyArg_ParseTuple(args, "ll:sum", &i, &j)) return NULL; res = i + j; return PyLong_FromLong(res); } +#define SUM_METHODDEF \ + {"sum", common_sum, METH_VARARGS, common_sum_doc} -static PyMethodDef TestMethods[] = { - {"foo", testexport_foo, METH_VARARGS, - testexport_foo_doc}, - {NULL, NULL} /* sentinel */ -}; +PyDoc_STRVAR(basic_initialized_count_doc, +"initialized_count()\n\ +\n\ +Return how many times the module has been initialized."); + +static PyObject * +basic_initialized_count(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + assert(PyModule_GetDef(self)->m_size == -1); + return PyLong_FromLong(global_state.initialized_count); +} + +#define INITIALIZED_COUNT_METHODDEF \ + {"initialized_count", basic_initialized_count, METH_VARARGS, \ + basic_initialized_count_doc} + + +/*********************************************/ +/* the _testsinglephase module (and aliases) */ +/*********************************************/ + +/* This ia more typical of legacy extensions in the wild: + - single-phase init + - no module state + - does not support repeated initialization + (so m_copy is used) + - the module def is cached in _PyRuntime.extensions + (by name/filename) + + Also note that, because the module has single-phase init, + it is cached in interp->module_by_index (using mod->md_def->m_base.m_index). + */ -static struct PyModuleDef _testsinglephase = { +static PyMethodDef TestMethods_Basic[] = { + LOOK_UP_SELF_METHODDEF, + SUM_METHODDEF, + INITIALIZED_METHODDEF, + INITIALIZED_COUNT_METHODDEF, + {NULL, NULL} /* sentinel */ +}; + +static struct PyModuleDef _testsinglephase_basic = { PyModuleDef_HEAD_INIT, .m_name = "_testsinglephase", - .m_doc = PyDoc_STR("Test module _testsinglephase (main)"), + .m_doc = PyDoc_STR("Test module _testsinglephase"), .m_size = -1, // no module state - .m_methods = TestMethods, + .m_methods = TestMethods_Basic, }; +static PyObject * +init__testsinglephase_basic(PyModuleDef *def) +{ + if (global_state.initialized_count == -1) { + global_state.initialized_count = 0; + } + + PyObject *module = PyModule_Create(def); + if (module == NULL) { + return NULL; + } + + module_state *state = &global_state.module; + // It may have been set by a previous run or under a different name. + clear_state(state); + if (init_state(state) < 0) { + Py_CLEAR(module); + return NULL; + } + + if (init_module(module, state) < 0) { + Py_CLEAR(module); + goto finally; + } + + global_state.initialized_count++; + +finally: + return module; +} PyMODINIT_FUNC PyInit__testsinglephase(void) { - PyObject *module = PyModule_Create(&_testsinglephase); + return init__testsinglephase_basic(&_testsinglephase_basic); +} + + +PyMODINIT_FUNC +PyInit__testsinglephase_basic_wrapper(void) +{ + return PyInit__testsinglephase(); +} + + +PyMODINIT_FUNC +PyInit__testsinglephase_basic_copy(void) +{ + static struct PyModuleDef def = { + PyModuleDef_HEAD_INIT, + .m_name = "_testsinglephase_basic_copy", + .m_doc = PyDoc_STR("Test module _testsinglephase_basic_copy"), + .m_size = -1, // no module state + .m_methods = TestMethods_Basic, + }; + return init__testsinglephase_basic(&def); +} + + +/*******************************************/ +/* the _testsinglephase_with_reinit module */ +/*******************************************/ + +/* This ia less typical of legacy extensions in the wild: + - single-phase init (same as _testsinglephase above) + - no module state + - supports repeated initialization + (so m_copy is not used) + - the module def is not cached in _PyRuntime.extensions + + At this point most modules would reach for multi-phase init (PEP 489). + However, module state has been around a while (PEP 3121), + and most extensions predate multi-phase init. + + (This module is basically the same as _testsinglephase, + but supports repeated initialization.) + */ + +static PyMethodDef TestMethods_Reinit[] = { + LOOK_UP_SELF_METHODDEF, + SUM_METHODDEF, + INITIALIZED_METHODDEF, + {NULL, NULL} /* sentinel */ +}; + +static struct PyModuleDef _testsinglephase_with_reinit = { + PyModuleDef_HEAD_INIT, + .m_name = "_testsinglephase_with_reinit", + .m_doc = PyDoc_STR("Test module _testsinglephase_with_reinit"), + .m_size = 0, + .m_methods = TestMethods_Reinit, +}; + +PyMODINIT_FUNC +PyInit__testsinglephase_with_reinit(void) +{ + /* We purposefully do not try PyState_FindModule() first here + since we want to check the behavior of re-loading the module. */ + PyObject *module = PyModule_Create(&_testsinglephase_with_reinit); if (module == NULL) { return NULL; } - /* Add an exception type */ - PyObject *temp = PyErr_NewException("_testsinglephase.error", NULL, NULL); - if (temp == NULL) { - goto error; + assert(get_module_state(module) == NULL); + + module_state state = {0}; + if (init_state(&state) < 0) { + Py_CLEAR(module); + return NULL; } - if (PyModule_AddObject(module, "error", temp) != 0) { - Py_DECREF(temp); - goto error; + + if (init_module(module, &state) < 0) { + Py_CLEAR(module); + goto finally; } - if (PyModule_AddIntConstant(module, "int_const", 1969) != 0) { - goto error; +finally: + /* We only needed the module state for setting the module attrs. */ + clear_state(&state); + return module; +} + + +/******************************************/ +/* the _testsinglephase_with_state module */ +/******************************************/ + +/* This ia less typical of legacy extensions in the wild: + - single-phase init (same as _testsinglephase above) + - has some module state + - supports repeated initialization + (so m_copy is not used) + - the module def is not cached in _PyRuntime.extensions + + At this point most modules would reach for multi-phase init (PEP 489). + However, module state has been around a while (PEP 3121), + and most extensions predate multi-phase init. + */ + +static PyMethodDef TestMethods_WithState[] = { + LOOK_UP_SELF_METHODDEF, + SUM_METHODDEF, + INITIALIZED_METHODDEF, + {NULL, NULL} /* sentinel */ +}; + +static struct PyModuleDef _testsinglephase_with_state = { + PyModuleDef_HEAD_INIT, + .m_name = "_testsinglephase_with_state", + .m_doc = PyDoc_STR("Test module _testsinglephase_with_state"), + .m_size = sizeof(module_state), + .m_methods = TestMethods_WithState, +}; + +PyMODINIT_FUNC +PyInit__testsinglephase_with_state(void) +{ + /* We purposefully do not try PyState_FindModule() first here + since we want to check the behavior of re-loading the module. */ + PyObject *module = PyModule_Create(&_testsinglephase_with_state); + if (module == NULL) { + return NULL; } - if (PyModule_AddStringConstant(module, "str_const", "something different") != 0) { - goto error; + module_state *state = get_module_state(module); + assert(state != NULL); + if (init_state(state) < 0) { + Py_CLEAR(module); + return NULL; } - return module; + if (init_module(module, state) < 0) { + clear_state(state); + Py_CLEAR(module); + goto finally; + } -error: - Py_DECREF(module); - return NULL; +finally: + return module; } diff --git a/Python/import.c b/Python/import.c index 302255d76edcd5..63ed2443657b29 100644 --- a/Python/import.c +++ b/Python/import.c @@ -428,6 +428,71 @@ PyImport_GetMagicTag(void) } +/* +We support a number of kinds of single-phase init builtin/extension modules: + +* "basic" + * no module state (PyModuleDef.m_size == -1) + * does not support repeated init (we use PyModuleDef.m_base.m_copy) + * may have process-global state + * the module's def is cached in _PyRuntime.imports.extensions, + by (name, filename) +* "reinit" + * no module state (PyModuleDef.m_size == 0) + * supports repeated init (m_copy is never used) + * should not have any process-global state + * its def is never cached in _PyRuntime.imports.extensions + (except, currently, under the main interpreter, for some reason) +* "with state" (almost the same as reinit) + * has module state (PyModuleDef.m_size > 0) + * supports repeated init (m_copy is never used) + * should not have any process-global state + * its def is never cached in _PyRuntime.imports.extensions + (except, currently, under the main interpreter, for some reason) + +There are also variants within those classes: + +* two or more modules share a PyModuleDef + * a module's init func uses another module's PyModuleDef + * a module's init func calls another's module's init func + * a module's init "func" is actually a variable statically initialized + to another module's init func +* two or modules share "methods" + * a module's init func copies another module's PyModuleDef + (with a different name) +* (basic-only) two or modules share process-global state + +In the first case, where modules share a PyModuleDef, the following +notable weirdness happens: + +* the module's __name__ matches the def, not the requested name +* the last module (with the same def) to be imported for the first time wins + * returned by PyState_Find_Module() (via interp->modules_by_index) + * (non-basic-only) its init func is used when re-loading any of them + (via the def's m_init) + * (basic-only) the copy of its __dict__ is used when re-loading any of them + (via the def's m_copy) + +However, the following happens as expected: + +* a new module object (with its own __dict__) is created for each request +* the module's __spec__ has the requested name +* the loaded module is cached in sys.modules under the requested name +* the m_index field of the shared def is not changed, + so at least PyState_FindModule() will always look in the same place + +For "basic" modules there are other quirks: + +* (whether sharing a def or not) when loaded the first time, + m_copy is set before _init_module_attrs() is called + in importlib._bootstrap.module_from_spec(), + so when the module is re-loaded, the previous value + for __wpec__ (and others) is reset, possibly unexpectedly. + +Generally, when multiple interpreters are involved, some of the above +gets even messier. +*/ + /* Magic for extension modules (built-in as well as dynamically loaded). To prevent initializing an extension module more than once, we keep a static dictionary 'extensions' keyed by the tuple @@ -489,9 +554,8 @@ _extensions_cache_clear(void) Py_CLEAR(_PyRuntime.imports.extensions); } -int -_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name, - PyObject *filename, PyObject *modules) +static int +fix_up_extension(PyObject *mod, PyObject *name, PyObject *filename) { if (mod == NULL || !PyModule_Check(mod)) { PyErr_BadInternalCall(); @@ -505,16 +569,13 @@ _PyImport_FixupExtensionObject(PyObject *mod, PyObject *name, } PyThreadState *tstate = _PyThreadState_GET(); - if (PyObject_SetItem(modules, name, mod) < 0) { - return -1; - } if (_PyState_AddModule(tstate, mod, def) < 0) { - PyMapping_DelItem(modules, name); return -1; } // bpo-44050: Extensions and def->m_base.m_copy can be updated // when the extension module doesn't support sub-interpreters. + // XXX Why special-case the main interpreter? if (_Py_IsMainInterpreter(tstate->interp) || def->m_size == -1) { if (def->m_size == -1) { if (def->m_base.m_copy) { @@ -541,15 +602,39 @@ _PyImport_FixupExtensionObject(PyObject *mod, PyObject *name, return 0; } +int +_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name, + PyObject *filename, PyObject *modules) +{ + if (PyObject_SetItem(modules, name, mod) < 0) { + return -1; + } + if (fix_up_extension(mod, name, filename) < 0) { + PyMapping_DelItem(modules, name); + return -1; + } + return 0; +} + int _PyImport_FixupBuiltin(PyObject *mod, const char *name, PyObject *modules) { - int res; + int res = -1; PyObject *nameobj; nameobj = PyUnicode_InternFromString(name); - if (nameobj == NULL) + if (nameobj == NULL) { return -1; - res = _PyImport_FixupExtensionObject(mod, nameobj, nameobj, modules); + } + if (PyObject_SetItem(modules, nameobj, mod) < 0) { + goto finally; + } + if (fix_up_extension(mod, nameobj, nameobj) < 0) { + PyMapping_DelItem(modules, nameobj); + goto finally; + } + res = 0; + +finally: Py_DECREF(nameobj); return res; }