Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes some bugs when using dump_session() with byref=True #463

Merged
merged 18 commits into from
May 1, 2022
Merged
Changes from 4 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
b98cd44
fix `dump_session(byref=True)` bug when no objetcts are imported from…
leogama Apr 22, 2022
44a9e54
fix `dump_session(byref=True)` bug when the `multiprocessing` module …
leogama Apr 22, 2022
a252e44
Save objects imported with an alias and top level modules by referenc…
leogama Apr 25, 2022
8b15b99
Deal with top level functions with `dump_session()`
leogama Apr 26, 2022
47a060d
Added tests for load_session() and dump_session()
leogama Apr 28, 2022
6156dc5
fix singleton comparison, must be by identity, not by equality
leogama Apr 28, 2022
bf419f1
split tests to different files to better test session use cases
leogama Apr 29, 2022
1aef037
Fix error Py2.7 and Py3.7 where there is a tuple in sys.modules for s…
leogama Apr 29, 2022
abdfd5c
dump_session(): extra test for code coverage
leogama Apr 29, 2022
a87496b
dump_session and load_session: some minor improvements
leogama Apr 30, 2022
e4ba1e8
dump_session(): more tests
leogama Apr 30, 2022
095b4cb
dump_session(): dump modules other than __main__ by reference
leogama Apr 30, 2022
d1450bf
dump_session(): minor code coverage investigation
leogama Apr 30, 2022
f292584
dump_session() tests: adjustments
leogama Apr 30, 2022
5b90579
dump_session() tests: fix copyright notice
leogama Apr 30, 2022
1f310a9
dump_session() tests: merge test files using subprocess to test loadi…
leogama Apr 30, 2022
3ae5f1a
tests: Revert change. Test files are independent, should run in any o…
leogama May 1, 2022
0c1b1d1
dump_sessio() tests: use an unpickleable object available in PyPy
leogama May 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 59 additions & 23 deletions dill/_dill.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,50 +397,79 @@ def loads(str, ignore=None, **kwds):
### End: Shorthands ###

### Pickle the Interpreter Session
SESSION_IMPORTED_AS_TYPES = (ModuleType, ClassType, TypeType, Exception,
FunctionType, MethodType, BuiltinMethodType)

def _module_map():
"""get map of imported modules"""
from collections import defaultdict
modmap = defaultdict(list)
from collections import defaultdict, namedtuple
modmap = namedtuple('Modmap', ['by_name', 'by_id', 'top_level'])
modmap = modmap(defaultdict(list), defaultdict(list), {})
items = 'items' if PY3 else 'iteritems'
for name, module in getattr(sys.modules, items)():
for modname, module in getattr(sys.modules, items)():
if module is None:
continue
for objname, obj in module.__dict__.items():
modmap[objname].append((obj, name))
if '.' not in modname:
modmap.top_level[id(module)] = modname
for objname, modobj in module.__dict__.items():
modmap.by_name[objname].append((modobj, modname))
modmap.by_id[id(modobj)].append((modobj, objname, modname))
return modmap

def _lookup_module(modmap, name, obj, main_module): #FIXME: needs work
"""lookup name if module is imported"""
for modobj, modname in modmap[name]:
if modobj is obj and modname != main_module.__name__:
return modname
def _lookup_module(modmap, name, obj, main_module):
"""lookup name or id of obj if module is imported"""
for modobj, modname in modmap.by_name[name]:
if modobj is obj and sys.modules[modname] is not main_module:
return modname, name
if isinstance(obj, SESSION_IMPORTED_AS_TYPES):
for modobj, objname, modname in modmap.by_id[id(obj)]:
if sys.modules[modname] is not main_module:
return modname, objname
return None, None

def _stash_modules(main_module):
modmap = _module_map()
imported = []
imported_as = []
imported_top_level = [] # keep separeted for backwards compatibility
original = {}
items = 'items' if PY3 else 'iteritems'
for name, obj in getattr(main_module.__dict__, items)():
source_module = _lookup_module(modmap, name, obj, main_module)
# Avoid incorrectly matching a singleton value in another package (ex.: __doc__).
if obj in (None, False, True):
original[name] = obj
continue
source_module, objname = _lookup_module(modmap, name, obj, main_module)
if source_module:
imported.append((source_module, name))
if objname == name:
imported.append((source_module, name))
else:
imported_as.append((source_module, objname, name))
else:
original[name] = obj
try:
imported_top_level.append((modmap.top_level[id(obj)], name))
except KeyError:
original[name] = obj
if len(imported):
import types
newmod = types.ModuleType(main_module.__name__)
newmod = ModuleType(main_module.__name__)
newmod.__dict__.update(original)
newmod.__dill_imported = imported
newmod.__dill_imported_as = imported_as
newmod.__dill_imported_top_level = imported_top_level
return newmod
else:
return original
return main_module

def _restore_modules(main_module):
if '__dill_imported' not in main_module.__dict__:
return
imports = main_module.__dict__.pop('__dill_imported')
for module, name in imports:
exec("from %s import %s" % (module, name), main_module.__dict__)
try:
for modname, name in main_module.__dict__.pop('__dill_imported'):
main_module.__dict__[name] = __import__(modname, None, None, [name]).__dict__[name]
for modname, objname, name in main_module.__dict__.pop('__dill_imported_as'):
main_module.__dict__[name] = __import__(modname, None, None, [objname]).__dict__[objname]
for modname, name in main_module.__dict__.pop('__dill_imported_top_level'):
main_module.__dict__[name] = __import__(modname)
except KeyError:
pass

#NOTE: 06/03/15 renamed main_module to main
def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds):
Expand All @@ -453,13 +482,15 @@ def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds):
else:
f = open(filename, 'wb')
try:
pickler = Pickler(f, protocol, **kwds)
pickler._original_main = main
if byref:
main = _stash_modules(main)
pickler = Pickler(f, protocol, **kwds)
pickler._main = main #FIXME: dill.settings are disabled
pickler._byref = False # disable pickling by name reference
pickler._recurse = False # disable pickling recursion for globals
pickler._session = True # is best indicator of when pickling a session
pickler._main_modified = main is not pickler._original_main
pickler.dump(main)
finally:
if f is not filename: # If newly opened file
Expand Down Expand Up @@ -1849,6 +1880,7 @@ def save_function(pickler, obj):
_recurse = getattr(pickler, '_recurse', None)
_byref = getattr(pickler, '_byref', None)
_postproc = getattr(pickler, '_postproc', None)
_main_modified = getattr(pickler, '_main_modified', None)
postproc_list = []
if _recurse:
# recurse to get all globals referred to by obj
Expand All @@ -1863,8 +1895,12 @@ def save_function(pickler, obj):
else:
globs_copy = obj.__globals__ if PY3 else obj.func_globals

# If the globals is the __dict__ from the module being save as a
# session, substitute it by the dictionary being actually saved.
if _main_modified and globs_copy is pickler._original_main.__dict__:
globs = globs_copy = pickler._main.__dict__
# If the globals is a module __dict__, do not save it in the pickle.
if globs_copy is not None and obj.__module__ is not None and \
elif globs_copy is not None and obj.__module__ is not None and \
getattr(_import_module(obj.__module__, True), '__dict__', None) is globs_copy:
globs = globs_copy
else:
Expand Down