diff --git a/hickle/__init__.py b/hickle/__init__.py index 03d475e0..589d60c6 100644 --- a/hickle/__init__.py +++ b/hickle/__init__.py @@ -4,8 +4,7 @@ from .hickle import * # All declaration -__all__ = ['hickle'] -__all__.extend(hickle.__all__) +__all__ = ['hickle', *hickle.__all__] # Author declaration __author__ = "Danny Price, Ellert van der Velden and contributors" diff --git a/hickle/hickle.py b/hickle/hickle.py index 3a99eed5..724d22f4 100644 --- a/hickle/hickle.py +++ b/hickle/hickle.py @@ -42,9 +42,9 @@ from hickle import __version__ from .helpers import PyContainer, NotHicklable, nobody_is_my_name from .lookup import ( - hkl_types_dict, hkl_container_dict, load_loader, load_legacy_loader , + #hkl_types_dict, hkl_container_dict, load_loader, load_legacy_loader , create_pickled_dataset, load_nothing, fix_lambda_obj_type,ReferenceManager, - link_dtype + LoaderManager,link_dtype ) @@ -138,7 +138,7 @@ def file_opener(f, path, mode='r'): # DUMPERS # ########### -def _dump(py_obj, h_group, name, memo, attrs={} , **kwargs): +def _dump(py_obj, h_group, name, memo, loader,attrs={} , **kwargs): """ Dump a python object to a group within an HDF5 file. This function is called recursively by the main dump() function. @@ -171,7 +171,7 @@ def _dump(py_obj, h_group, name, memo, attrs={} , **kwargs): # Check if we have a unloaded loader for the provided py_obj and # retrive the most apropriate method for creating the corresponding # representation within HDF5 file - py_obj_type, (create_dataset, base_type,memoise) = load_loader(py_obj.__class__) + py_obj_type, (create_dataset, base_type,memoise) = loader.load_loader(py_obj.__class__) try: h_node,h_subitems = create_dataset(py_obj, h_group, name, **kwargs) except NotHicklable: @@ -196,10 +196,10 @@ def _dump(py_obj, h_group, name, memo, attrs={} , **kwargs): # loop through list of all subitems and recursively dump them # to HDF5 file for h_subname,py_subobj,h_subattrs,sub_kwargs in h_subitems: - _dump(py_subobj,h_node,h_subname,memo,h_subattrs,**sub_kwargs) + _dump(py_subobj,h_node,h_subname,memo,loader,h_subattrs,**sub_kwargs) -def dump(py_obj, file_obj, mode='w', path='/', **kwargs): +def dump(py_obj, file_obj, mode='w', path='/', options = {},**kwargs): """ Write a hickled representation of `py_obj` to the provided `file_obj`. @@ -220,6 +220,17 @@ def dump(py_obj, file_obj, mode='w', path='/', **kwargs): path : str, optional Path within HDF5-file or group to save data to. Defaults to root ('/'). + loader_settings (dict): + Each entry in this dict modifies how hickle dumps data to file. + For example + { compact_expand = True } + would enforce use of compact_expand loader on all classes + registered with this kind of loader. + { compact_expand = False } + would disable compact_expand loader for dumped data even if + globally turned on. More options may follow. + + kwargs : keyword arguments Additional keyword arguments that must be provided to the :meth:`~h5py.Group.create_dataset` method. @@ -247,8 +258,9 @@ def dump(py_obj, file_obj, mode='w', path='/', **kwargs): h_root_group.attrs["HICKLE_VERSION"] = __version__ h_root_group.attrs["HICKLE_PYTHON_VERSION"] = py_ver - with ReferenceManager.create_manager(h_root_group) as memo: - _dump(py_obj, h_root_group,'data', memo ,**kwargs) + with LoaderManager.create_manager(h_root_group,False,options) as loader: + with ReferenceManager.create_manager(h_root_group) as memo: + _dump(py_obj, h_root_group,'data', memo ,loader,**kwargs) finally: # Close the file if requested. # Closing a file twice will not cause any problems @@ -368,12 +380,14 @@ def load(file_obj, path='/', safe=True): # eventhough stated otherwise in documentation. Activate workarrounds # just in case issues arrise. Especially as corresponding lambdas in # load_numpy are not needed anymore and thus have been removed. - with ReferenceManager.create_manager(h_root_group,fix_lambda_obj_type) as memo: - _load(py_container, 'data',h_root_group['data'],memo,load_loader = load_legacy_loader) + with LoaderManager.create_manager(h_root_group,True) as loader: + with ReferenceManager.create_manager(h_root_group,fix_lambda_obj_type) as memo: + _load(py_container, 'data',h_root_group['data'],memo,loader) #load_loader = load_legacy_loader) return py_container.convert() # 4.1.x file and newer - with ReferenceManager.create_manager(h_root_group,pickle_loads) as memo: - _load(py_container, 'data',h_root_group['data'],memo,load_loader = load_loader) + with LoaderManager.create_manager( h_root_group,False) as loader: + with ReferenceManager.create_manager(h_root_group,pickle_loads) as memo: + _load(py_container, 'data',h_root_group['data'],memo,loader) #load_loader = load_loader) return py_container.convert() # Else, raise error @@ -390,7 +404,7 @@ def load(file_obj, path='/', safe=True): -def _load(py_container, h_name, h_node,memo,load_loader = load_loader): +def _load(py_container, h_name, h_node,memo,loader): #load_loader = load_loader): """ Load a hickle file Recursive funnction to load hdf5 data into a PyContainer() @@ -417,14 +431,14 @@ def _load(py_container, h_name, h_node,memo,load_loader = load_loader): # load the type information of node. py_obj_type,base_type,is_container = memo.resolve_type(h_node) - py_obj_type,(_,_,memoise) = load_loader(py_obj_type) + py_obj_type,(_,_,memoise) = loader.load_loader(py_obj_type) if is_container: # Either a h5py.Group representing the structure of complex objects or # a h5py.Dataset representing a h5py.Reference to the node of an object # referred to from multiple places within the objet structure to be dumped - py_container_class = hkl_container_dict.get(base_type,NoMatchContainer) + py_container_class = loader.hkl_container_dict.get(base_type,NoMatchContainer) py_subcontainer = py_container_class(h_node.attrs,base_type,py_obj_type) # NOTE: Sorting of container items according to their key Name is @@ -432,14 +446,14 @@ def _load(py_container, h_name, h_node,memo,load_loader = load_loader): # as loader has all the knowledge required to properly decide # if sort is necessary and how to sort and at what stage to sort for h_key,h_subnode in py_subcontainer.filter(h_node): - _load(py_subcontainer, h_key, h_subnode, memo , load_loader) + _load(py_subcontainer, h_key, h_subnode, memo ,loader) # load_loader) # finalize subitem sub_data = py_subcontainer.convert() py_container.append(h_name,sub_data,h_node.attrs) else: # must be a dataset load it and append to parent container - load_fn = hkl_types_dict.get(base_type, no_match_load) + load_fn = loader.hkl_types_dict.get(base_type, no_match_load) sub_data = load_fn(h_node,base_type,py_obj_type) py_container.append(h_name,sub_data,h_node.attrs) # store loaded object for properly restoring addtional references to it diff --git a/hickle/loaders/__init__.py b/hickle/loaders/__init__.py index e69de29b..f655528c 100644 --- a/hickle/loaders/__init__.py +++ b/hickle/loaders/__init__.py @@ -0,0 +1,23 @@ +# names all optional loaders defined by any load_*.py file +# will be extended by any optional loader managed by hickle +# core engine. Names of optional_loaders must be all lower case. +# Correponding option attributes in hickle file will be all +# upper case. +optional_loaders = { + # option loader for defining custom loader methods and + # PyContainer classes. By marking them as custom optoin + # they are only activate if sepecified by a call to + # hickle.dump. If not specified than custom objects and + # classes will simply be stored as pickle string. + # The data may in this case not be recoverable if + # underlying classes are not available or not compatible + # any more due to disruptive changes. When dumped using + # custom loader hickle at least can try to restore data + # as numpy.array or python dict like structure with metadata + # attached as is for further inspection. + 'custom', +} + +# prefix for optional_loaders attribute names which are all +# uppercase +attribute_prefix = "OPTION_" diff --git a/hickle/lookup.py b/hickle/lookup.py index 988e463e..a3200821 100644 --- a/hickle/lookup.py +++ b/hickle/lookup.py @@ -45,19 +45,24 @@ import warnings import types import io +import re import operator import functools as ft import weakref -from importlib.util import find_spec, module_from_spec +import os.path +from importlib.util import find_spec, module_from_spec,spec_from_file_location,spec_from_loader +from importlib import invalidate_caches # Package imports +import collections import dill as pickle import copyreg import numpy as np import h5py as h5 # hickle imports -from .helpers import PyContainer,not_dumpable,nobody_is_my_name,no_compression +from .helpers import PyContainer,not_dumpable,nobody_is_my_name,no_compression,NotHicklable +from .loaders import optional_loaders, attribute_prefix # %% GLOBALS @@ -124,7 +129,6 @@ class LookupError(Exception): # pragma: nocover """ exception thrown if type lookup fails """ - class SerializedWarning(UserWarning): # pragma: nocover """ An object type was not understood @@ -139,6 +143,44 @@ class MockedLambdaWarning(UserWarning): # pragma: nocover loadin properly """ +class AttemptRecoverCustom(): + """ + Dummy type indicating that restoring py_obj_type correspoinding to an + entry in the hickle_types_table could not be restored. Most likely + pickle.loads encountered an ImportError/ModuleNotFoundError indicating + that the package and/or module defining the py_obj_type is not installed. + + Before giving up and throwing an Exceptoin hickle tries to at least recover + the data and as much aspossible of the metadata stored within the h5py.Group + or h5py.Dataset attributes. + """ + +class RecoveredGroup(dict,AttemptRecoverCustom): + __slots__ = ('attrs',) + def __init__(self,*args,attrs={},**kwargs): + super().__init__(*args,**kwargs) + self.attrs={name:value for name,value in attrs.items() if name not in {'type'}} + +class RecoveredDataset(np.ndarray,AttemptRecoverCustom): + __slots__ = ('attrs',) + def __new__(cls,input_array,dtype=None,attrs={}): + array_copy = np.array(input_array,dtype=dtype) + obj = super().__new__( + cls, + shape = array_copy.shape, + dtype = array_copy.dtype, + buffer=array_copy, + offset=0, + strides=array_copy.strides, + order = 'C' if array_copy.flags.c_contiguous else 'F' + ) + obj.attrs = {name:value for name,value in attrs.items() if name not in {'type'}} + return obj + + def __array_finalize__(self,obj): + if obj is not None: + self.attrs = getattr(obj,'attrs',{}) + class ManagerMeta(type): """ Metaclas for all manager classes derived from the BaseManager class. @@ -209,16 +251,40 @@ def create_manager(cls,h_node,create_entry):#h_node,pickle_loads = pickle.loads) created object. It may include further items specific to the actual subclass. + Raises: + ------- + LookupError: + if manager has already been created for h_node or its h_root_group """ manager = cls.__managers__.get(h_node.file.id,None) if manager is not None: - raise ReferenceError("'{}' type manager already created for file '{}'".format(cls.__name__,h_node.file.filename)) + raise LookupError("'{}' type manager already created for file '{}'".format(cls.__name__,h_node.file.filename)) #root = ReferenceManager.get_root(h_node) table = cls.__managers__[h_node.file.id] = create_entry() weakref.finalize(table[0],cls._drop_manager,h_node.file.id) return table[0] + @classmethod + def get_manager(cls,h_node): + """ + return manager responsible for file h_node is conained within + + Parameters: + ----------- + h_node (h5py.File, h5py.Group, h5py.Dataset): + the h5py node to obtaine the responsible manager fore + + Raises: + ------- + LookupError: + if no manager has been created yet for h_node or its h_root_group + """ + try: + return cls.__managers__[h_node.file.id][0] + except KeyError: + raise ReferenceError("no managers exist for file '{}'".format(h_node.file.filename)) + def __init__(self): if type.mro(self.__class__)[0] is BaseManager: raise TypeError("'BaseManager' class must be subclassed") @@ -242,14 +308,15 @@ class ReferenceManager(BaseManager,dict): with statement for example as follows: with ReferenceManager.create_manager(h_root_group) as memo: - _dump(data,h_root_group,'data',memo,**kwargs) + _dump(data,h_root_group,'data',memo,loader,**kwargs) with ReferenceManager.create_manager(h_root_group) as memo: - _load(py_container,'data',h_root_group['data'],memo,load_loader = load_loader) + _load(py_container,'data',h_root_group['data'],memo,loader) with ReferenceManager.create_manager(h_root_group,fix_lambda_obj_type) as memo: - _load(py_container,'data',h_root_group['data'],memo,load_loader = load_legacy_loader) + _load(py_container,'data',h_root_group['data'],memo,loader) + NOTE: for creating appropriate loader object see LoaderManager """ __slots__ = ('_py_obj_type_table','_py_obj_type_link','_base_type_link','_overlay','pickle_loads') @@ -269,7 +336,7 @@ def get_root(h_node): entry = h_node.file.get(entry_ref,None) if entry is not None: return entry.parent.parent - elif h_node.parent == h_node.file: + if h_node.parent == h_node.file: # h_node is either the h_root_group it self or the file node representing # the open hickle file. return h_node if isinstance(h_node,h5.Group) else h_node.file @@ -284,7 +351,7 @@ def get_root(h_node): # 'type' seems to be a a byte string or string fallback to h_node.file return h_node.file try: - entry = h_node.parent.get(entry_ref,None) + entry = h_node.file.get(entry_ref,None) except ValueError: entry = None if entry is None: @@ -324,15 +391,15 @@ def create_manager(cls,h_node,pickle_loads = pickle.loads): Raises: ------- - ReferenceError: - ReferenceManager has already been created for h_node or its h_root_group + LookupError: + if ReferenceManager has already been created for h_node or its h_root_group """ root = ReferenceManager.get_root(h_node) def create_manager(): return (ReferenceManager(h_node,pickle_loads = pickle_loads),ReferenceManager.get_root(h_node)) return super().create_manager(h_node,create_manager) - def __init__(self,h_root_group,*args,pickle_loads = pickle.loads,**kwargs): + def __init__(self, h_root_group, *args,pickle_loads = pickle.loads, **kwargs): """ constructs ReferenceManager object @@ -397,7 +464,6 @@ def __init__(self,h_root_group,*args,pickle_loads = pickle.loads,**kwargs): self._base_type_link[base_type] = entry self._base_type_link[entry.id] = base_type continue - py_obj_type = pickle.loads(entry[()]) base_type_ref = entry.attrs.get('base_type',None) if not isinstance(base_type_ref,h5.Reference): raise ReferenceError("inconsistent 'hickle_types_table' entryies for py_obj_type '{}': no base_type".format(py_obj_type)) @@ -408,13 +474,19 @@ def __init__(self,h_root_group,*args,pickle_loads = pickle.loads,**kwargs): base_type = self._base_type_link.get(base_type_entry.id,None) if base_type is None: base_type = base_type_entry.name.rsplit('/',1)[-1].encode('ascii') - py_obj_type_id = id(py_obj_type) - self._py_obj_type_link[py_obj_type_id] = entry - self._py_obj_type_link[entry.id] = (py_obj_type_id,base_type) + try: + py_obj_type = pickle.loads(entry[()]) + except (ImportError,AttributeError): + py_obj_type = AttemptRecoverCustom + entry_link = py_obj_type,'!recover!',base_type + else: + entry_link = py_obj_type,base_type + self._py_obj_type_link[id(py_obj_type)] = entry + self._py_obj_type_link[entry.id] = entry_link - def store_type(self,h_node, py_obj_type, base_type = None,**kwargs): + def store_type(self,h_node, py_obj_type, base_type = None,attr_name = 'type',**kwargs): """ - assings a 'py_obj_type' entry reference to the 'type' attribute + assings a 'py_obj_type' entry reference to the attribute specified by attr_name of h_node and creates if not present the appropriate 'hickle_types_table' entries for py_obj_type and base_type. @@ -441,6 +513,10 @@ def store_type(self,h_node, py_obj_type, base_type = None,**kwargs): will be crated for py_obj_type if not already present and a LookupError exception is raised instead. + attr_name (str): + the name of the attribute the type reference shall be stored to. Defaults + to 'type' + kwargs (dict): keyword arguments to be passed to h5py.Group.create_dataset function when creating the entries for py_obj_type and base_type anew @@ -499,9 +575,9 @@ def store_type(self,h_node, py_obj_type, base_type = None,**kwargs): entry.attrs['base_type'] = base_entry.ref self._py_obj_type_link[py_obj_type_id] = entry self._py_obj_type_link[entry.id] = (py_obj_type,base_type) - h_node.attrs['type'] = entry.ref + h_node.attrs[attr_name] = entry.ref - def resolve_type(self,h_node): + def resolve_type(self,h_node,attr_name = 'type',base_type_type = 1): """ resolves the py_obj_type and base_type pair referred to by the 'type' attribute and if present the 'base_type' attribute. @@ -520,7 +596,16 @@ def resolve_type(self,h_node): Parameters: ----------- h_node (h5py.Group,h5py.Dataset): - the node to resolve py_obj_type and base_type for + the node to resolve py_obj_type and base_type for using reference stored in + attribute specified by attr_name + + attr_name (str): + the name of the attribute the type reference shall be restored from. Defaults + to 'type' + base_type_type (int): + 1 (default) base_type used to select loader + 2 original base_type correponding to not understood py_obj_type of + recovered h5py.Group or h5py.Dataset Returns: tuple containing (py_obj_type,base_type,is_container) @@ -530,7 +615,7 @@ def resolve_type(self,h_node): base_type: the base_type string indicating the loader to be used for properly - restoring the py_obj_type instance + restoring the py_obj_type instance or the base_type string is_container: booling flag indicating whether h_node represents a h5py.Group or @@ -552,7 +637,7 @@ def resolve_type(self,h_node): # fix_lambda_obj_type below which will properly handle None value of type_ref # in any other case file is not a hickle 4.0.X legacy file and thus has to be # considered broken - type_ref = h_node.attrs.get('type',None) + type_ref = h_node.attrs.get(attr_name,None) if not isinstance(type_ref,h5.Reference): if type_ref is None: try: @@ -569,12 +654,14 @@ def resolve_type(self,h_node): try: # set is_container_flag to True if h_node is h5py.Group type object and false otherwise return self.pickle_loads(type_ref),h_node.attrs.get('base_type',b'pickle'),isinstance(h_node,h5.Group) + except (ImportError,AttributeError): + return AttemptRecoverCustom,( h_node.attrs.get('base_type',b'pickle') if base_type_type == 2 else b'!recover!' ),isinstance(h_node,h5.Group) except (TypeError,pickle.UnpicklingError,EOFError): - raise ReferenceError("node '{}': 'type' attribute ('{}')invalid: not a pickle byte string".format(h_node.name,type_ref)) + raise ReferenceError("node '{}': '{}' attribute ('{}')invalid: not a pickle byte string".format(h_node.name,attr_name,type_ref)) try: entry = self._py_obj_type_table[type_ref] except (ValueError,KeyError): - raise ReferenceError("node '{}': 'type' attribute invalid: stale reference") + raise ReferenceError("node '{}': '{}' attribute invalid: stale reference".format(h_node.name,attr_name)) # load (py_obj_type,base_type) pair from _py_obj_type_link for resolve # 'hickle_types_table' entry referred to by 'type' entry @@ -597,12 +684,18 @@ def resolve_type(self,h_node): base_type = base_type_entry.name.rsplit('/',1)[-1].encode('ASCII') self._base_type_link[base_type] = base_type_entry self._base_type_link[base_type_entry.id] = base_type - py_obj_type = self.pickle_loads(entry[()]) - self._py_obj_type_link[id(py_obj_type)] = entry - type_info = self._py_obj_type_link[entry.id] = (py_obj_type,base_type) + try: + py_obj_type = pickle.loads(entry[()]) + except (ImportError,AttributeError): + py_obj_type = AttemptRecoverCustom + entry_link = (py_obj_type,b'!recover!',base_type) + else: + entry_link = (py_obj_type,base_type) + self._py_obj_type_link[id(py_obj_type)] = entry + type_info = self._py_obj_type_link[entry.id] = entry_link # return (py_obj_type,base_type). set is_container flag to true if # h_node is h5py.Group object and false otherwise - return (*type_info,isinstance(h_node,h5.Group)) + return (type_info[0],type_info[base_type_type],isinstance(h_node,h5.Group)) def __enter__(self): if not isinstance(self._py_obj_type_table,h5.Group) or not self._py_obj_type_table: @@ -629,205 +722,462 @@ def __exit__(self,exc_type,exc_value,exc_traceback): ##################### _managed_by_hickle = {'hickle',''} -# This function registers a class to be used by hickle -def register_class(myclass_type, hkl_str, dump_function=None, load_function=None, container_class=None,memoise = True): - """ Register a new hickle class. - Parameters: - ----------- - myclass_type type(class): type of class - hkl_str (str): String to write to HDF5 file to describe class - dump_function (function def): function to write data to HDF5 - load_function (function def): function to load data from HDF5 - container_class (class def): proxy class to load data from HDF5 - memoise (bool): - True: references to the object instances shall be remembered - during dump and load for properly resolving multiple - references to the same object instance. - False: every occurence of an instance of the object has to be dumped - and restored on load disregarding instances already present. - - Raises: - ------- - TypeError: - myclass_type represents a py_object the loader for which is to - be provided by hickle.lookup and hickle.hickle module only - +_custom_loader_enabled_builtins = {'__main__':('','')} + +class LoaderManager(BaseManager): """ + Handles the file specic lookup of loader to be used to dump or load + a python object of a specific type - if ( - myclass_type is object or - isinstance( - myclass_type, - (types.FunctionType,types.BuiltinFunctionType,types.MethodType,types.BuiltinMethodType) - ) or - issubclass(myclass_type,(type,_DictItem)) - ): - # object as well als all kinds of functions and methods as well as all class objects and - # the special _DictItem class are to be handled by hickle core only. - dump_module = getattr(dump_function,'__module__','').split('.',2) - load_module = getattr(load_function,'__module__','').split('.',2) - container_module = getattr(container_class,'__module__','').split('.',2) - if {dump_module[0],load_module[0],container_module[0]} - _managed_by_hickle: - raise TypeError( - "loader for '{}' type managed by hickle only".format( - myclass_type.__name__ - ) - ) - if "loaders" in {*dump_module[1:2],*load_module[1:2],*container_module[1:2]}: - raise TypeError( - "loader for '{}' type managed by hickle core only".format( - myclass_type.__name__ - ) - ) - # add loader - if dump_function is not None: - types_dict[myclass_type] = ( dump_function, hkl_str,memoise) - if load_function is not None: - hkl_types_dict[hkl_str] = load_function - if container_class is not None: - hkl_container_dict[hkl_str] = container_class + To create a LoaderManager call LoaderManager.create_manager + function. The value returned can be ans shall be used within a + with statement for example as follows: + with LoaderManager.create_manager(h_root_group) as loader: + _dump(data,h_root_group,'data',memo,loader,**kwargs) -def register_class_exclude(hkl_str_to_ignore): - """ Tell loading funciton to ignore any HDF5 dataset with attribute - 'type=XYZ' + with LoaderManager.create_manager(h_root_group,False,{'compact_expand':true}) as loader: + _load(py_container,'data',h_root_group['data'],memo,loader) + + with LoaderManager.create_manager(h_root_group,True) as memo: + _load(py_container,'data',h_root_group['data'],memo,loader) + + NOTE: for creating appropriate memo object see ReferenceManager - Args: - hkl_str_to_ignore (str): attribute type=string to ignore and exclude - from loading. """ + # available loaders - if hkl_str_to_ignore in disallowed_to_ignore: - raise ValueError( - "excluding '{}' base_type managed by hickle core not possible".format( - hkl_str_to_ignore - ) - ) - hkl_types_dict[hkl_str_to_ignore] = load_nothing - hkl_container_dict[hkl_str_to_ignore] = NoContainer + # Define dict of all acceptable types + __py_types__ = { + None: {}, + 'hickle-4.0': {}, + **{ option:{} for option in optional_loaders } + } + # Define dict of all acceptable hickle types + __hkl_functions__ = { + None: {}, + 'hickle-4.0': {}, + **{ option:{} for option in optional_loaders } + } -def load_loader(py_obj_type, type_mro = type.mro): - """ - Checks if given `py_obj` requires an additional loader to be handled - properly and loads it if so. + # Define dict of all acceptable hickle container types + __hkl_container__ = { + None: {}, + 'hickle-4.0': {}, + **{ option:{} for option in optional_loaders } + } - Parameters: - ----------- - py_obj: - the Python object to find an appropriate loader for + # Empty list (hashable) of loaded loader names + __loaded_loaders__ = set() - Returns: - -------- - py_obj: - the Python object the loader was requested for - (create_dataset,base_type,memoise): - tuple providing create_dataset function, name of base_type - used to represent py_obj and the boolean memoise flag - indicating whether loaded object shall be remembered - for restoring further references to it or must be loaded every time - encountered. + @classmethod + def register_class(cls,myclass_type, hkl_str, dump_function=None, load_function=None, container_class=None,memoise = True,option=None): + """ Register a new class to be recognized and dumped or restored by hickle. + + Parameters: + ----------- + myclass_type type(class): + type of class + hkl_str (str): + String to write to HDF5 file to describe class + dump_function (callable): + callable to write data to HDF5 + load_function (callable): + function to load data from HDF5 + container_class (class): + proxy class to load data from HDF5 + memoise (bool): + True: references to the object instances shall be remembered + during dump and load for properly resolving multiple + references to the same object instance. + False: every occurence of an instance of the object has to be dumped + and restored on load disregarding instances already present. + option (str, None): + String identifying set of loaders which shall only be used when + specific feature or category is requested on top of global loaders. + If None than loader is globally to used if there is no other loader + registered for myclass_type. + + Raises: + ------- + TypeError: + myclass_type represents a py_object the loader for which is to + be provided by hickle.lookup and hickle.hickle module only + ValueError: + if optoinal loader tries to shadow loaders essential to prorper function + of hickle.dump and hickle load ('pickle', '!node-reference!') + LookupError: + if option loader shall belong to is unknown. Any new option must be listed + in 'optional_loaders' exported by 'hickle.loaders.__init__.py' file to be + recognized as valid option + + """ + + if ( + myclass_type is object or + isinstance( + myclass_type, + (types.FunctionType,types.BuiltinFunctionType,types.MethodType,types.BuiltinMethodType) + ) or + issubclass(myclass_type,(type,_DictItem)) + ): + # object as well als all kinds of functions and methods as well as all class objects and + # the special _DictItem class are to be handled by hickle core only. + dump_module = getattr(dump_function,'__module__','').split('.',2) + load_module = getattr(load_function,'__module__','').split('.',2) + container_module = getattr(container_class,'__module__','').split('.',2) + if {dump_module[0],load_module[0],container_module[0]} - _managed_by_hickle: + raise TypeError( + "loader for '{}' type managed by hickle only".format( + myclass_type.__name__ + ) + ) + if "loaders" in {*dump_module[1:2],*load_module[1:2],*container_module[1:2]}: + raise TypeError( + "loader for '{}' type managed by hickle core only".format( + myclass_type.__name__ + ) + ) + if option is not None and hkl_str in disallow_in_option: + raise ValueError("'{}' base_type may not be shadowed by option specific loader".format(hkl_str)) + # add loader + try: + if dump_function is not None: + cls.__py_types__[option][myclass_type] = ( dump_function, hkl_str,memoise) + if load_function is not None: + cls.__hkl_functions__[option][hkl_str] = load_function + if container_class is not None: + cls.__hkl_container__[option][hkl_str] = container_class + except KeyError: + raise LookupError("Invalid option '{}' encountered".format(option)) - Raises: - ------- - RuntimeError: - in case py object is defined by hickle core machinery. - """ + @classmethod + def register_class_exclude(cls,hkl_str_to_ignore,option = None): + """ Tell loading funciton to ignore any HDF5 dataset with attribute + 'type=XYZ' + + Parameters: + ----------- + hkl_str_to_ignore (str): attribute type=string to ignore and exclude + from loading. + option (str, None): + String identifying set of loaders from which class shall be excluded + + Raises: + ------- + ValueError: + class is managed by hickle core machinery and thus may not be ignored + LookupError: + if option loader shall belong to is unknown. Any new option must be listed + in 'optional_loaders' exported by 'hickle.loaders.__init__.py' file to be + recognized as valid option + """ + + if hkl_str_to_ignore in disallowed_to_ignore: + raise ValueError( + "excluding '{}' base_type managed by hickle core not possible".format( + hkl_str_to_ignore + ) + ) + try: + cls.__hkl_functions__[option][hkl_str_to_ignore] = load_nothing + cls.__hkl_container__[option][hkl_str_to_ignore] = NoContainer + except KeyError: + raise LookupError("'{}' option unknown".format(option)) - # any function or method object, any class object will be passed to pickle - # ensure that in any case create_pickled_dataset is called. + __slots__ = ( 'types_dict','hkl_types_dict','hkl_container_dict','_mro','_file') - # get the class type of py_obj and loop over the entire mro_list - for mro_item in type_mro(py_obj_type): - # Check if mro_item can be found in types_dict and return if so - loader_item = types_dict.get(mro_item,None) - if loader_item is not None: - return py_obj_type,loader_item - # Obtain the package name of mro_item - package_list = mro_item.__module__.split('.',2) + _option_formatter = '{}{{}}'.format(attribute_prefix) + _option_parser = re.compile(r'^{}(.*)$'.format(attribute_prefix),re.I) - if package_list[0] == 'hickle': - if package_list[1] != 'loaders': - print(mro_item,package_list) - raise RuntimeError( - "objects defined by hickle core must be registerd" - " before first dump or load" - ) - if ( - len(package_list) < 3 or - not package_list[2].startswith("load_") or - '.' in package_list[2][5:] - ): - warnings.warn( - "ignoring '{!r}' dummy type not defined by loader module".format(py_obj_type), - RuntimeWarning - ) - continue - # dummy objects are not dumpable ensure that future lookups return that result + def __init__(self,h_root_group,legacy = False,options = None): + """ + constructs LoaderManager object + + Parameters: + ----------- + h_root_group (h5py.Group): + see LoaderManager.create_manager + + legacy (bool): + if true file h_node belongs to is in legacy hickle 4.0.X format + ensure lambdy py_obj_type strings are loaded properly and + 'hickle-4.0' type loaders are included within types_dict, + 'hkl_types_dict' and 'hkl_container_dict' + + options (dict): + optional loaders to be loaded. Each key names one loader and + its value indicates whether to be used (True) or excluded (False) + """ + + # initialize lookup dictionaries with set of common loaders + self.types_dict = collections.ChainMap(self.__class__.__py_types__[None]) + self.hkl_types_dict = collections.ChainMap(self.__class__.__hkl_functions__[None]) + self.hkl_container_dict = collections.ChainMap(self.__class__.__hkl_container__[None]) + + # select source of optoinal loader flags. If option is None try to read optoins + # from h_root_group.attrs structure. Otherwise use content of options dict store + # each entry to be used within h_root_group.attrs sturcture or update entry there + if options is None: + option_items = ( + match[1].lower() + for match,on in ( + ( LoaderManager._option_parser.match(name), value ) for name, value in h_root_group.attrs.items() + ) + if match and on + ) + else: + def set_option_items(): + for option_key,on in options.items(): + if not on: + continue + h_root_group.attrs[LoaderManager._option_formatter.format(option_key.upper())] = on + yield option_key + option_items = set_option_items() + # try to include loader set indicated by option_name + try: + for option_name in option_items: + self.types_dict.maps.insert(0,self.__class__.__py_types__[option_name]) + self.hkl_types_dict.maps.insert(0,self.__class__.__hkl_functions__[option_name]) + self.hkl_container_dict.maps.insert(0,self.__class__.__hkl_container__[option_name]) + except KeyError: + raise LookupError("Option '{}' invalid".format(option_name)) + + # add loaders required to properly load legacy files created by hickle 4.0.X and + # ensure that non class types are properly reported by load_loader + if legacy: + self._mro = type_legacy_mro + self.types_dict.maps.insert(0,self.__class__.__py_types__['hickle-4.0']) + self.hkl_types_dict.maps.insert(0,self.__class__.__hkl_functions__['hickle-4.0']) + self.hkl_container_dict.maps.insert(0,self.__class__.__hkl_container__['hickle-4.0']) + else: + self._mro = type.mro + self._file = h_root_group.file + + def load_loader(self,py_obj_type): + """ + Checks if given `py_obj` requires an additional loader to be handled + properly and loads it if so. + + Parameters: + ----------- + py_obj: + the Python object to find an appropriate loader for + + Returns: + -------- + py_obj: + the Python object the loader was requested for + + (create_dataset,base_type,memoise): + tuple providing create_dataset function, name of base_type + used to represent py_obj and the boolean memoise flag + indicating whether loaded object shall be remembered + for restoring further references to it or must be loaded every time + encountered. + + Raises: + ------- + RuntimeError: + in case py object is defined by hickle core machinery. + + """ + + # any function or method object, any class object will be passed to pickle + # ensure that in any case create_pickled_dataset is called. + + # get the class type of py_obj and loop over the entire mro_list + types_dict = self.types_dict + loaded_loaders = self.__class__.__loaded_loaders__ + for mro_item in self._mro(py_obj_type): + # Check if mro_item can be found in types_dict and return if so loader_item = types_dict.get(mro_item,None) - if loader_item is None: - loader_item = types_dict[mro_item] = ( not_dumpable, b'NotHicklable',False ) - # ensure module of mro_item is loaded as loader as it will contain - # loader which knows how to handle group or dataset with dummy as - # py_obj_type - loader_name = mro_item.__module__ - if loader_name in loaded_loaders: - # loader already loaded as triggered by dummy abort search and return - # what found so far as fallback to further bases does not make sense + if loader_item is not None: return py_obj_type,loader_item - else: - # Obtain the name of the associated loader - loader_name = 'hickle.loaders.load_{:s}'.format(package_list[0]) + + # Obtain the package name of mro_item + package_list = mro_item.__module__.split('.',2) + + package_file = None + if package_list[0] == 'hickle': + if package_list[1] != 'loaders': + print(mro_item,package_list) + raise RuntimeError( + "objects defined by hickle core must be registerd" + " before first dump or load" + ) + if ( + len(package_list) < 3 or + not package_list[2].startswith("load_") or + '.' in package_list[2][5:] + ): + warnings.warn( + "ignoring '{!r}' dummy type not defined by loader module".format(py_obj_type), + RuntimeWarning + ) + continue + # dummy objects are not dumpable ensure that future lookups return that result + loader_item = types_dict.get(mro_item,None) + if loader_item is None: + loader_item = types_dict[mro_item] = ( not_dumpable, b'NotHicklable',False ) + # ensure module of mro_item is loaded as loader as it will contain + # loader which knows how to handle group or dataset with dummy as + # py_obj_type + loader_name = mro_item.__module__ + if loader_name in loaded_loaders: + # loader already loaded as triggered by dummy abort search and return + # what found so far as fallback to further bases does not make sense + return py_obj_type,loader_item + else: + loader_name,package_file = _custom_loader_enabled_builtins.get(package_list[0],(None,'')) + if loader_name is None: + # construct the name of the associated loader + loader_name = 'hickle.loaders.load_{:s}'.format(package_list[0]) + elif not loader_name: + package_module = sys.modules.get(package_list[0],None) + if package_module is None: + # TODO print warning that package module was unloaded from python + # since mro_item was created + continue + package_file = getattr(package_module,'__file__',None) + if package_file is None: + package_loader = getattr(package_module,'__loader__',None) + if package_loader is None: #pragma: nocover + continue + package_spec = spec_from_loader(package_list[0],package_loader) + if not getattr(package_spec,'has_location',False): + continue + package_file = package_spec.origin + if not os.path.isabs(package_file): # pragma: nocover + package_spec = find_spec(os.path.basename(package_file.rsplit('.')[0])) + if not package_spec.has_location: # pargma: nocover + continue + package_file = package_spec.origin + package_list[0],allow_custom_loader = os.path.basename(package_file).rsplit('.')[0],package_list[0] + loader_name = 'hickle.loaders.load_{:s}'.format(package_list[0]) + _custom_loader_enabled_builtins[allow_custom_loader] = loader_name, package_file + + # Check if this module is already loaded, and return if so + if loader_name in loaded_loaders: + # loader is loaded but does not define loader for mro_item + # check next base class + continue + + # check if loader module has already been loaded. If use that instead + # of importing it anew + loader = sys.modules.get(loader_name,None) + if loader is None: + loader_spec = find_spec(loader_name) + if loader_spec is None: + if package_file is None: # pragma: nocover + # not sure if hit at all + continue + if not package_file: + package_spec = getattr(sys.modules.get(package_list[0],None),'__spec__',None) + if package_spec is None: # pragma: nocover + # not sure if hit at all + package_spec = find_spec(package_list[0]) + if not package_spec.has_location: + # can't resolve package or base module hosting mro_item + continue + package_file = package_spec.origin # pragma: nocover + package_path = os.path.dirname(package_file) + package_loader_path = os.path.join(package_path, "hickle_loaders", "load_{:s}.py".format(package_list[0])) + try: + fid = open(package_loader_path,'rb') + except FileNotFoundError: # pragma: nocover + try: + package_loader_path += 'c' + fid = open(package_loader_path,'rb') + except FileNotFoundError: + # no file for loader module found + continue + else: + fid.close() + else: + fid.close() + loader_spec = spec_from_file_location(loader_name,package_loader_path) + # import the the loader module described by module_spec + # any import errors and exceptions result at this stage from + # errors inside module and not cause loader module does not + # exist + loader = module_from_spec(loader_spec) + loader_spec.loader.exec_module(loader) + sys.modules[loader_name] = loader + + # load all loaders defined by loader module + # no performance benefit of starmap or map if required to build + # list or tuple of None's returned + for next_loader in loader.class_register: + self.register_class(*next_loader) + for drop_loader in loader.exclude_register: + self.register_class_exclude(drop_loader) + loaded_loaders.add(loader_name) + + # check if loader module defines a loader for base_class mro_item + loader_item = types_dict.get(mro_item,None) + if loader_item is not None: + # return loader for base_class mro_item + return py_obj_type,loader_item + # the new loader does not define loader for mro_item + # check next base class + + # no appropriate loader found return fallback to pickle + return py_obj_type,(create_pickled_dataset,b'pickle',True) - # Check if this module is already loaded, and return if so - if loader_name in loaded_loaders: - # loader is loaded but does not define loader for mro_item - # check next base class - continue + @classmethod + def create_manager(cls,h_node,legacy = False,options = None): + """ + creates an new LoaderManager object for the h_root_group the h_node + belongs to. - # check if loader module has already been loaded. If use that instead - # of importing it anew - loader = sys.modules.get(loader_name,None) - if loader is None: - # Try to load a loader with this name - loader_spec = find_spec(loader_name) - if loader_spec is None: - - # no module sepecification found for module - # check next base class - continue - # import the the loader module described by module_spec - # any import errors and exceptions result at this stage from - # errors inside module and not cause loader module does not - # exists - loader = module_from_spec(loader_spec) - loader_spec.loader.exec_module(loader) - sys.modules[loader_name] = loader - - # load all loaders defined by loader module - # no performance benefit of starmap or map if required to build - # list or tuple of None's returned - for next_loader in loader.class_register: - register_class(*next_loader) - for drop_loader in loader.exclude_register: - register_class_exclude(drop_loader) - loaded_loaders.add(loader_name) - - # check if loader module defines a loader for base_class mro_item - loader_item = types_dict.get(mro_item,None) - if loader_item is not None: - # return loader for base_class mro_item - return py_obj_type,loader_item - # the new loader does not define loader for mro_item - # check next base class - - # no appropriate loader found return fallback to pickle - return py_obj_type,(create_pickled_dataset,b'pickle',True) + Parameters: + ----------- + h_node (h5py.Group, h5py.Dataset): + the h5py node or its h_root_group to create a new LoaderManager + object for. + + legacy (bool): + if true file h_node belongs to is in legacy hickle 4.0.X format + ensure lambdy py_obj_type strings are loaded properly and + 'hickle-4.0' type loaders are included within types_dict, + 'hkl_types_dict' and 'hkl_container_dict' + + options (dict): + optional loaders to be loaded. Each key names one loader and + its value indicates whether to be used (True) or excluded (False) + + Raises: + ------- + LookupError: + if ReferenceManager has already been created for h_node or its h_root_group + """ + + def create_manager(): + return (LoaderManager(h_node,legacy,options),) + return super().create_manager(h_node,create_manager) + + def __enter__(self): + if not isinstance(self._file,h5.File) or not self._file: + raise RuntimeError("Stale LoaderManager, call LoaderManager.create_manager to create a new one") + return self + + def __exit__(self,exc_type,exc_value,exc_traceback): + if not isinstance(self._file,h5.File) or not self._file: + return + super().__exit__(exc_type,exc_value,exc_traceback,self._file) + self._file = None + self._mro = None + self.types_dict = None + self.hkl_types_dict = None + self.hkl_container_dict = None + def type_legacy_mro(cls): """ drop in replacement of type.mro for loading legacy hickle 4.0.x files which were @@ -852,13 +1202,16 @@ def type_legacy_mro(cls): return (cls,) return type.mro(cls) -load_legacy_loader = ft.partial(load_loader,type_mro = type_legacy_mro) +#load_legacy_loader = ft.partial(load_loader,type_mro = type_legacy_mro) # %% BUILTIN LOADERS (not maskable) # list of below hkl_types which may not be ignored disallowed_to_ignore = {b'dict_item',b'pickle',b'!node-reference!',b'moc_lambda'} +# list of below hkl_types which may not be redefined by optional loader +disallow_in_option = {b'!node-reference!',b'pickle'} + class NoContainer(PyContainer): # pragma: nocover """ load nothing container @@ -877,7 +1230,7 @@ class _DictItemContainer(PyContainer): def convert(self): return self._content[0] -register_class(_DictItem, b'dict_item',dump_nothing,load_nothing,_DictItemContainer) +LoaderManager.register_class(_DictItem, b'dict_item',dump_nothing,load_nothing,_DictItemContainer,False,'hickle-4.0') class ExpandReferenceContainer(PyContainer): @@ -910,7 +1263,7 @@ def convert(self): # objects created by resolving h5py.Reference datasets are already stored inside # memo dictionary so no need to remoise them. -register_class(NodeReference,b'!node-reference!',dump_nothing,load_nothing,ExpandReferenceContainer,False) +LoaderManager.register_class(NodeReference,b'!node-reference!',dump_nothing,load_nothing,ExpandReferenceContainer,False) def create_pickled_dataset(py_obj, h_group, name, reason = None, **kwargs): @@ -919,9 +1272,16 @@ def create_pickled_dataset(py_obj, h_group, name, reason = None, **kwargs): structure. In case raise a warning and convert to pickle string. Args: - py_obj: python object to dump; default if item is not matched. - h_group (h5.File.group): group to dump data into. - name (str): the name of the resulting dataset + py_obj: + python object to dump; default if item is not matched. + h_group (h5.File.group): + group to dump data into. + name (str): + the name of the resulting dataset + reason (str,None): + reason why py_object has to be pickled eg. string + provided by NotHicklable exception + """ # for what ever reason py_obj could not be successfully reduced @@ -943,13 +1303,47 @@ def load_pickled_data(h_node, base_type, py_obj_type): """ loade pickle string and return resulting py_obj """ - return pickle.loads(h_node[()]) + try: + return pickle.loads(h_node[()]) + except (ImportError,AttributeError): + return RecoveredDataset(h_node[()],dtype = h_node.dtype,attrs = dict(h_node.attrs)) # no dump method is registered for object as this is the default for # any unknown object and for classes, functions and methods -register_class(object,b'pickle',None,load_pickled_data) +LoaderManager.register_class(object,b'pickle',None,load_pickled_data) + +def recover_custom_dataset(h_node,base_type,py_obj_type): + manager = ReferenceManager.get_manager(h_node) + _,base_type,_ = manager.resolve_type(h_node,base_type_type = 2) + attrs = dict(h_node.attrs) + attrs['base_type'] = base_type + return RecoveredDataset(h_node[()],dtype=h_node.dtype,attrs=attrs) + +class RecoverGroupContainer(PyContainer): + def __init__(self,h5_attrs, base_type, object_type): + super().__init__(h5_attrs, base_type, object_type,_content = {}) + + def filter(self,h_parent): + manager = ReferenceManager.get_manager(h_parent) + _,self.base_type,_ = manager.resolve_type(h_parent,base_type_type = 2) + yield from h_parent.items() + + def append(self,name,item,h5_attrs): + if isinstance(item,AttemptRecoverCustom): + self._content[name] = item + else: + self._content[name] = (item,{ key:value for key,value in h5_attrs.items() if key not in {'type'}}) + def convert(self): + attrs = {key:value for key,value in self._h5_attrs.items() if key not in {'type'}} + attrs['base_type'] = self.base_type + return RecoveredGroup(self._content,attrs=attrs) + + +LoaderManager.register_class(AttemptRecoverCustom,b'!recover',None,recover_custom_dataset,RecoverGroupContainer,True) + + def _moc_numpy_array_object_lambda(x): """ @@ -966,7 +1360,7 @@ def _moc_numpy_array_object_lambda(x): """ return x[0] -register_class(_moc_numpy_array_object_lambda,b'moc_lambda',dump_nothing,load_nothing) +LoaderManager.register_class(_moc_numpy_array_object_lambda,b'moc_lambda',dump_nothing,load_nothing,None,True,'hickle-4.0') def fix_lambda_obj_type(bytes_object, *, fix_imports=True, encoding="ASCII", errors="strict"): """ diff --git a/hickle/tests/test_02_hickle_lookup.py b/hickle/tests/test_02_hickle_lookup.py index 505123e4..2c10ce08 100644 --- a/hickle/tests/test_02_hickle_lookup.py +++ b/hickle/tests/test_02_hickle_lookup.py @@ -15,15 +15,19 @@ import weakref # Package imports +import collections import numpy as np import h5py import dill as pickle -from importlib.util import find_spec +from importlib.util import find_spec,spec_from_loader,spec_from_file_location from importlib import reload +from copy import copy +import os.path from py.path import local # hickle imports from hickle.helpers import PyContainer,not_dumpable +from hickle.loaders import optional_loaders, attribute_prefix import hickle.lookup as lookup # Set current working directory to the temporary directory @@ -73,6 +77,9 @@ def loader_table(): lookup.types_dict.clear() lookup.hkl_types_dict.clear() lookup.hkl_container_dict.clear() + tuple( True for opt in lookup.LoaderManager.__py_types__.values() if opt.clear() ) + tuple( True for opt in lookup.LoaderManager.__hkl_functions__.values() if opt.clear() ) + tuple( True for opt in lookup.LoaderManager.__hkl_container__.values() if opt.clear() ) # simulate loader definitions found within loader modules def create_test_dataset(myclass_type,h_group,name,**kwargs): @@ -114,7 +121,7 @@ class IsHickleCore(TestContainer): (tuple,b'tuple',None,load_test_dataset,TestContainer), (lookup._DictItem,b'dict_item',None,None,NotHicklePackage), (lookup._DictItem,b'pickle',None,None,HickleLoadersModule), - (lookup._DictItem,b'dict_item',lookup.register_class,None,IsHickleCore) + (lookup._DictItem,b'dict_item',lookup.LoaderManager.register_class,None,IsHickleCore) ] # cleanup and reload hickle.lookup module to reset it to its initial state @@ -124,6 +131,9 @@ class IsHickleCore(TestContainer): lookup.types_dict.clear() lookup.hkl_types_dict.clear() lookup.hkl_container_dict.clear() + tuple( True for opt in lookup.LoaderManager.__py_types__.values() if opt.clear() ) + tuple( True for opt in lookup.LoaderManager.__hkl_functions__.values() if opt.clear() ) + tuple( True for opt in lookup.LoaderManager.__hkl_container__.values() if opt.clear() ) reload(lookup) lookup = sys.modules[lookup.__name__] hickle_hickle = sys.modules.get("hickle.hickle",None) @@ -164,95 +174,68 @@ def __eq__(self,other): def __ne__(self,other): return self != other - - -class MetaClassToDump(type): - """ - Metaclass for ClassToDump allowing to controll which - unbound class methods and magic methods are visible to - create_pickled_dataset method and which not at the - class level - """ - - # any function listed therein is not defined on class - # when called the next time (single shot) - hide_special = set() - - def __getattribute__(self,name): - if name in MetaClassToDump.hide_special: - MetaClassToDump.hide_special.remove(name) - raise AttributeError("") - return super(MetaClassToDump,self).__getattribute__(name) - - -class ClassToDump(metaclass=MetaClassToDump): +class ClassToDump(): """ Primary class used to test create_pickled_dataset function """ def __init__(self,hallo,welt,with_default=1): self._data = hallo,welt,with_default - def dump_boundmethod(self): - """ - dummy instance method used to check if instance methods are - either rejected or allways stored as pickle string - """ - pass - - @staticmethod - def dump_staticmethod(): - """ - dummy static method used to check if static methods are allways - stored as pickle string - """ - pass - - @classmethod - def dump_classmethod(cls): - """ - dummy class method used to check if class methods are allways - stored as pickle string - """ - pass - def __eq__(self,other): return other.__class__ is self.__class__ and self._data == other._data def __ne__(self,other): return self != other - def __getattribute__(self,name): - # ensure that methods which are hidden by metaclass are also not - # accessible from class instance - if name in MetaClassToDump.hide_special: - raise AttributeError("") - return super(ClassToDump,self).__getattribute__(name) +class ClassToDumpCompact(ClassToDump): + """ + Class which may be handled by 'compact_expand' loader + """ + def __compact__(self): + return self._data - def __getstate__(self): - # returns the state of this class when asked by copy protocol handler - return self.__dict__ + def __expand__(self,compact): + self._data = compact - def __setstate__(self,state): - - # set the state from the passed state description - self.__dict__.update(state) +class ClassToDumpCompactOff(ClassToDump): + """ + Class which enforces that any instance is pickled + independent whether 'compact_expand' loader was selected + for hickle.dump call or not + """ + def __compact__(self): + return None - # controls whether the setstate method is reported as - # sixth element of tuple returned by __reduce_ex__ or - # __reduce__ function or not - extern_setstate = False +class ClassToDumpCompactStrange(ClassToDump): + """ + Class which does not properly implement + '__compact__' and '__expand__' methods + recommended by compact expand protocol + """ + def __compact__(self): + return self._data - def __reduce_ex__(self,proto = pickle.DEFAULT_PROTOCOL): - state = super(ClassToDump,self).__reduce_ex__(proto) - if len(state) > 5 or not ClassToDump.extern_setstate: - return state - return (*state,*( (None,) * ( 5 - len(state)) ),ClassToDump.__setstate__) +class ClassToDumpCompactStrange2(ClassToDump): + """ + Another class which does not properly implement + '__compact__' and '__expand__' methods + recommended by compact expand protocol + """ + def __compact__(self): + return 42 + +class ClassToDumpCompactDataset(ClassToDump): + """ + Class which is to be represented by a h5py.Dataset + instead of a h5py.Group in its compacted form + """ + def __compact__(self): + return "{}|{}|{}".format(*self._data) - def __reduce__(self): - state = super(ClassToDump,self).__reduce__() - if len(state) > 5 or not ClassToDump.extern_setstate: - return state - return (*state,*( (None,) * ( 5 - len(state)) ),ClassToDump.__setstate__) + def __expand__(self,compact): + self._data = compact.split("|") + self._data[2] = int(self._data[2]) + self._data = (*self._data,) class SimpleClass(): """ @@ -290,7 +273,23 @@ def function_to_dump(hallo,welt,with_default=1): """ return hallo,welt,with_default -def test_register_class(loader_table): +def test_AttemptRecoverCustom_classes(h5_data): + recovered_group = lookup.RecoveredGroup({'hello':1},attrs={'world':2,'type':42}) + assert recovered_group == {'hello':1} and recovered_group.attrs == {'world':2} + array_to_recover = np.random.random_sample([4,2]) + dataset_to_recover = h5_data.create_dataset('to_recover',data=array_to_recover) + dataset_to_recover.attrs['world'] = 2 + dataset_to_recover.attrs['type'] = 42 + recovered_dataset = lookup.RecoveredDataset(dataset_to_recover[()],dtype=dataset_to_recover.dtype,attrs=dataset_to_recover.attrs) + assert np.allclose(recovered_dataset,array_to_recover) + assert recovered_dataset.dtype == array_to_recover.dtype + assert recovered_dataset.attrs == {'world':2} + #recovered = lookup.recover_custom_dataset(dataset_to_recover,'unknown',dataset_to_recover.attrs['type']) + #assert recovered.dtype == array_to_recover.dtype and recovered == array_to_recover + #assert recovered.attrs == {'world':2} + + +def test_LoaderManager_register_class(loader_table): """ tests the register_class method """ @@ -298,63 +297,70 @@ def test_register_class(loader_table): # try to register dataset only loader specified by loader_table # and retrieve its contents from types_dict and hkl_types_dict loader_spec = loader_table[0] - lookup.register_class(*loader_spec) - assert lookup.types_dict[loader_spec[0]] == (*loader_spec[2:0:-1],loader_spec[5]) - assert lookup.hkl_types_dict[loader_spec[1]] == loader_spec[3] + lookup.LoaderManager.register_class(*loader_spec) + assert lookup.LoaderManager.__py_types__[None][loader_spec[0]] == (*loader_spec[2:0:-1],loader_spec[5]) + assert lookup.LoaderManager.__hkl_functions__[None][loader_spec[1]] == loader_spec[3] with pytest.raises(KeyError): - lookup.hkl_container_dict[loader_spec[1]] is None + lookup.LoaderManager.__hkl_container__[None][loader_spec[1]] is None # try to register PyContainer only loader specified by loader_table # and retrive its contents from types_dict and hkl_contianer_dict loader_spec = loader_table[1] - lookup.register_class(*loader_spec) - assert lookup.types_dict[loader_spec[0]] == (*loader_spec[2:0:-1],loader_spec[5]) + lookup.LoaderManager.register_class(*loader_spec) + assert lookup.LoaderManager.__py_types__[None][loader_spec[0]] == (*loader_spec[2:0:-1],loader_spec[5]) with pytest.raises(KeyError): - lookup.hkl_types_dict[loader_spec[1]] is None - assert lookup.hkl_container_dict[loader_spec[1]] == loader_spec[4] + lookup.LoaderManager.__hkl_functions__[None][loader_spec[1]] is None + assert lookup.LoaderManager.__hkl_container__[None][loader_spec[1]] == loader_spec[4] # try to register container without dump_function specified by # loader table and try to retrive load_function and PyContainer from # hkl_types_dict and hkl_container_dict loader_spec = loader_table[2] - lookup.register_class(*loader_spec) + lookup.LoaderManager.register_class(*loader_spec) with pytest.raises(KeyError): - lookup.types_dict[loader_spec[0]][1] == loader_spec[1] - assert lookup.hkl_types_dict[loader_spec[1]] == loader_spec[3] - assert lookup.hkl_container_dict[loader_spec[1]] == loader_spec[4] + lookup.LoaderManager.__py_types__[None][loader_spec[0]][1] == loader_spec[1] + assert lookup.LoaderManager.__hkl_functions__[None][loader_spec[1]] == loader_spec[3] + assert lookup.LoaderManager.__hkl_container__[None][loader_spec[1]] == loader_spec[4] # try to register loader shadowing loader preset by hickle core # defined by external loader module loader_spec = loader_table[3] with pytest.raises(TypeError,match = r"loader\s+for\s+'\w+'\s+type\s+managed\s+by\s+hickle\s+only"): - lookup.register_class(*loader_spec) + lookup.LoaderManager.register_class(*loader_spec) loader_spec = loader_table[4] # try to register loader shadowing loader preset by hickle core # defined by hickle loaders module with pytest.raises(TypeError,match = r"loader\s+for\s+'\w+'\s+type\s+managed\s+by\s+hickle\s+core\s+only"): - lookup.register_class(*loader_spec) + lookup.LoaderManager.register_class(*loader_spec) # simulate registering loader preset by hickle core loader_spec = loader_table[5] - lookup.register_class(*loader_spec) + lookup.LoaderManager.register_class(*loader_spec) + loader_spec = loader_table[0] + with pytest.raises(ValueError): + lookup.LoaderManager.register_class(loader_spec[0],b'!node-reference!',*loader_spec[2:],'custom') + with pytest.raises(lookup.LookupError): + lookup.LoaderManager.register_class(*loader_spec,'mine') -def test_register_class_exclude(loader_table): +def test_LoaderManager_register_class_exclude(loader_table): """ test registr class exclude function """ # try to disable loading of loader preset by hickle core base_type = loader_table[5][1] - lookup.register_class(*loader_table[2]) - lookup.register_class(*loader_table[5]) + lookup.LoaderManager.register_class(*loader_table[2]) + lookup.LoaderManager.register_class(*loader_table[5]) with pytest.raises(ValueError,match = r"excluding\s+'.+'\s+base_type\s+managed\s+by\s+hickle\s+core\s+not\s+possible"): - lookup.register_class_exclude(base_type) + lookup.LoaderManager.register_class_exclude(base_type) # disable any of the other loaders base_type = loader_table[2][1] - lookup.register_class_exclude(base_type) + lookup.LoaderManager.register_class_exclude(base_type) + with pytest.raises(lookup.LookupError): + lookup.LoaderManager.register_class_exclude(base_type,'compact') def patch_importlib_util_find_spec(name,package=None): @@ -365,6 +371,45 @@ def patch_importlib_util_find_spec(name,package=None): """ return find_spec("hickle.tests." + name.replace('.','_',1),package) +def patch_importlib_util_find_spec_no_load_builtins(name,package=None): + """ + function used to temporarily redirect search for laoders + to hickle_loader directory in test directory for testing + loading of new loaders + """ + if name in {'hickle.loaders.load_builtins'}: + return None + return find_spec("hickle.tests." + name.replace('.','_',1),package) + +def patch_importlib_util_spec_from_tests_loader(name, loader, *, origin=None, is_package=None): + """ + function used to temporarily redirect search for laoders + to hickle_loader directory in test directory for testing + loading of new loaders + """ + name = name.replace('.','_',1) + myloader = copy(sys.modules['hickle.tests'].__loader__) + myloader.name = "hickle.tests." + name + myloader.path = os.path.join(os.path.dirname(myloader.path),'{}.py'.format(name)) + return spec_from_loader(myloader.name,myloader,origin=origin,is_package=is_package) + +def patch_importlib_util_spec_from_loader(name, loader, *, origin=None, is_package=None): + """ + function used to temporarily redirect search for laoders + to hickle_loader directory in test directory for testing + loading of new loaders + """ + return spec_from_loader("hickle.tests." + name.replace('.','_',1),loader,origin=origin,is_package=is_package) + +def patch_importlib_util_spec_from_file_location(name, location, *, loader=None, submodule_search_locations=None): + """ + function used to temporarily redirect search for laoders + to hickle_loader directory in test directory for testing + loading of new loaders + """ + return spec_from_file_location("hickle.tests." + name.replace('.','_',1),location,loader=loader,submodule_search_locations =submodule_search_locations) + + def patch_importlib_util_find_no_spec(name,package=None): """ function used to simulate situation where no appropriate loader @@ -372,10 +417,115 @@ def patch_importlib_util_find_no_spec(name,package=None): """ return None +def patch_importlib_util_no_spec_from_loader(name, loader, *, origin=None, is_package=None): + """ + function used to simulate situation where no appropriate loader + could be found for object + """ + return None + +def patch_importlib_util_no_spec_from_file_location(name, location, *, loader=None, submodule_search_locations=None): + """ + function used to simulate situation where no appropriate loader + could be found for object + """ + return None + +def test_LoaderManager(loader_table,h5_data): + """ + tests LoaderManager constructor + """ + manager = lookup.LoaderManager(h5_data,False) + assert isinstance(manager.types_dict,collections.ChainMap) + assert manager.types_dict.maps[0] is lookup.LoaderManager.__py_types__[None] + assert isinstance(manager.hkl_types_dict,collections.ChainMap) + assert manager.hkl_types_dict.maps[0] is lookup.LoaderManager.__hkl_functions__[None] + assert isinstance(manager.hkl_container_dict,collections.ChainMap) + assert manager.hkl_container_dict.maps[0] is lookup.LoaderManager.__hkl_container__[None] + assert manager._mro is type.mro + assert manager._file.id == h5_data.file.id + manager = lookup.LoaderManager(h5_data,True) + assert manager.types_dict.maps[0] is lookup.LoaderManager.__py_types__['hickle-4.0'] + assert manager.types_dict.maps[1] is lookup.LoaderManager.__py_types__[None] + assert manager.hkl_types_dict.maps[0] is lookup.LoaderManager.__hkl_functions__['hickle-4.0'] + assert manager.hkl_types_dict.maps[1] is lookup.LoaderManager.__hkl_functions__[None] + assert manager.hkl_container_dict.maps[0] is lookup.LoaderManager.__hkl_container__['hickle-4.0'] + assert manager.hkl_container_dict.maps[1] is lookup.LoaderManager.__hkl_container__[None] + assert manager._mro is lookup.type_legacy_mro + assert manager._file.id == h5_data.file.id + + ###### ammend ##### + manager = lookup.LoaderManager(h5_data,False,{'custom':True}) + assert manager.types_dict.maps[0] is lookup.LoaderManager.__py_types__['custom'] + assert manager.types_dict.maps[1] is lookup.LoaderManager.__py_types__[None] + assert manager.hkl_types_dict.maps[0] is lookup.LoaderManager.__hkl_functions__['custom'] + assert manager.hkl_types_dict.maps[1] is lookup.LoaderManager.__hkl_functions__[None] + assert manager.hkl_container_dict.maps[0] is lookup.LoaderManager.__hkl_container__['custom'] + assert manager.hkl_container_dict.maps[1] is lookup.LoaderManager.__hkl_container__[None] + assert manager._file.id == h5_data.file.id + assert h5_data.attrs.get('{}CUSTOM'.format(attribute_prefix),None) + manager = lookup.LoaderManager(h5_data,False,None) + assert manager.types_dict.maps[0] is lookup.LoaderManager.__py_types__['custom'] + assert manager.types_dict.maps[1] is lookup.LoaderManager.__py_types__[None] + assert manager.hkl_types_dict.maps[0] is lookup.LoaderManager.__hkl_functions__['custom'] + assert manager.hkl_types_dict.maps[1] is lookup.LoaderManager.__hkl_functions__[None] + assert manager.hkl_container_dict.maps[0] is lookup.LoaderManager.__hkl_container__['custom'] + assert manager.hkl_container_dict.maps[1] is lookup.LoaderManager.__hkl_container__[None] + assert manager._file.id == h5_data.file.id + h5_data.attrs.pop('{}CUSTOM'.format(attribute_prefix),None) + manager = lookup.LoaderManager(h5_data,False,{'custom':False}) + assert manager.types_dict.maps[0] is lookup.LoaderManager.__py_types__[None] + assert manager.hkl_types_dict.maps[0] is lookup.LoaderManager.__hkl_functions__[None] + assert manager.hkl_container_dict.maps[0] is lookup.LoaderManager.__hkl_container__[None] + assert h5_data.attrs.get('{}CUSTOM'.format(attribute_prefix),h5_data) is h5_data + + with pytest.raises(lookup.LookupError): + manager = lookup.LoaderManager(h5_data,False,{'compact':True}) + + +def test_LoaderManager_drop_manager(h5_data): + """ + test static LoaderManager._drop_table method + """ + loader = lookup.LoaderManager(h5_data) + lookup.LoaderManager.__managers__[h5_data.file.id] = (loader,) + some_other_file = h5py.File('someother.hdf5','w') + some_other_root = some_other_file.create_group('root') + lookup.LoaderManager._drop_manager(some_other_root.file.id) + lookup.LoaderManager.__managers__[some_other_file.file.id] = (lookup.LoaderManager(some_other_root),) + assert lookup.LoaderManager.__managers__.get(h5_data.file.id,None) == (loader,) + lookup.LoaderManager._drop_manager(h5_data.file.id) + assert lookup.LoaderManager.__managers__.get(h5_data.file.id,None) is None + lookup.LoaderManager._drop_manager(some_other_root.file.id) + assert not lookup.LoaderManager.__managers__ + some_other_file.close() + +def test_LoaderManager_create_manager(h5_data): + """ + test public static LoaderManager.create_manager function + """ + second_tree = h5_data.file.create_group('seondary_root') + loader = lookup.LoaderManager.create_manager(h5_data) + assert lookup.LoaderManager.__managers__[h5_data.file.id][0] is loader + with pytest.raises(lookup.LookupError): + second_table = lookup.LoaderManager.create_manager(second_tree) + lookup.LoaderManager._drop_manager(h5_data.file.id) + +def test_LoaderManager_context(h5_data): + """ + test use of LoaderManager as context manager + """ + with lookup.LoaderManager.create_manager(h5_data) as loader: + assert lookup.LoaderManager.__managers__[h5_data.file.id][0] is loader + assert loader._file is None + with pytest.raises(RuntimeError): + with loader as loader2: + pass + loader.__exit__(None,None,None) -def test_load_loader(loader_table,monkeypatch): +def test_LoaderManager_load_loader(loader_table,h5_data,monkeypatch): """ - test load_loader function + test LoaderManager.load_loader method """ # some data to check loader for @@ -383,76 +533,123 @@ def test_load_loader(loader_table,monkeypatch): py_object = dict() loader_name = "hickle.loaders.load_builtins" with monkeypatch.context() as moc_import_lib: - - # hide loader from hickle.lookup.loaded_loaders and check that - # fallback loader for python object is returned - moc_import_lib.setattr("importlib.util.find_spec",patch_importlib_util_find_no_spec) - moc_import_lib.setattr("hickle.lookup.find_spec",patch_importlib_util_find_no_spec) - moc_import_lib.delitem(sys.modules,"hickle.loaders.load_builtins",raising=False) - py_obj_type,nopickleloader = lookup.load_loader(py_object.__class__) - assert py_obj_type is dict and nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) - - # redirect load_builtins loader to tests/hickle_loader path - moc_import_lib.setattr("importlib.util.find_spec",patch_importlib_util_find_spec) - moc_import_lib.setattr("hickle.lookup.find_spec",patch_importlib_util_find_spec) - - # preload dataset only loader and check that it can be resolved directly - loader_spec = loader_table[0] - lookup.register_class(*loader_spec) - assert lookup.load_loader((12).__class__) == (loader_spec[0],(*loader_spec[2:0:-1],loader_spec[5])) - - # try to find appropriate loader for dict object, a moc of this - # loader should be provided by hickle/tests/hickle_loaders/load_builtins - # module ensure that this module is the one found by load_loader function - import hickle.tests.hickle_loaders.load_builtins as load_builtins - moc_import_lib.setitem(sys.modules,loader_name,load_builtins) - assert lookup.load_loader(py_object.__class__) == (dict,(load_builtins.create_package_test,b'dict',True)) - - # remove loader again and undo redirection again. dict should now be - # processed by create_pickled_dataset - moc_import_lib.delitem(sys.modules,loader_name) - del lookup.types_dict[dict] - py_obj_type,nopickleloader = lookup.load_loader(py_object.__class__) - assert py_obj_type is dict and nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) - - # check that load_loader prevenst redefinition of loaders to be predefined by hickle core - with pytest.raises( - RuntimeError, - match = r"objects\s+defined\s+by\s+hickle\s+core\s+must\s+be" - r"\s+registerd\s+before\s+first\s+dump\s+or\s+load" - ): - py_obj_type,nopickleloader = lookup.load_loader(ToBeInLoadersOrNotToBe) - monkeypatch.setattr(ToBeInLoadersOrNotToBe,'__module__','hickle.loaders') - - # check that load_loaders issues drop warning upon loader definitions for - # dummy objects defined within hickle package but outsied loaders modules - with pytest.warns( - RuntimeWarning, - match = r"ignoring\s+'.+'\s+dummy\s+type\s+not\s+defined\s+by\s+loader\s+module" - ): - py_obj_type,nopickleloader = lookup.load_loader(ToBeInLoadersOrNotToBe) - assert py_obj_type is ToBeInLoadersOrNotToBe - assert nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) - - # check that loader definitions for dummy objets defined by loaders work as expected - # by loader module - monkeypatch.setattr(ToBeInLoadersOrNotToBe,'__module__',loader_name) - py_obj_type,(create_dataset,base_type,memoise) = lookup.load_loader(ToBeInLoadersOrNotToBe) - assert py_obj_type is ToBeInLoadersOrNotToBe and base_type == b'NotHicklable' - assert create_dataset is not_dumpable - assert memoise == False - - # remove loader_name from list of loaded loaders and check that loader is loaded anew - # and that values returned for dict object correspond to loader - # provided by freshly loaded loader module - lookup.loaded_loaders.remove(loader_name) - py_obj_type,(create_dataset,base_type,memoise) = lookup.load_loader(py_object.__class__) - load_builtins_moc = sys.modules.get(loader_name,None) - assert load_builtins_moc is not None - loader_spec = load_builtins_moc.class_register[0] - assert py_obj_type is dict and create_dataset is loader_spec[2] - assert base_type is loader_spec[1] - assert memoise == True + with lookup.LoaderManager.create_manager(h5_data) as loader: + + # hide loader from hickle.lookup.loaded_loaders and check that + # fallback loader for python object is returned + moc_import_lib.setattr("importlib.util.find_spec",patch_importlib_util_find_no_spec) + moc_import_lib.setattr("hickle.lookup.find_spec",patch_importlib_util_find_no_spec) + moc_import_lib.setattr("importlib.util.spec_from_loader",patch_importlib_util_no_spec_from_loader) + moc_import_lib.setattr("hickle.lookup.spec_from_loader",patch_importlib_util_no_spec_from_loader) + moc_import_lib.setattr("importlib.util.spec_from_file_location",patch_importlib_util_no_spec_from_file_location) + moc_import_lib.setattr("hickle.lookup.spec_from_file_location",patch_importlib_util_no_spec_from_file_location) + moc_import_lib.delitem(sys.modules,"hickle.loaders.load_builtins",raising=False) + py_obj_type,nopickleloader = loader.load_loader(py_object.__class__) + assert py_obj_type is dict and nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) + assert py_obj_type is dict and nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) + + lookup._custom_loader_enabled_builtins[py_obj_type.__class__.__module__] = ('','') + py_obj_type,nopickleloader = loader.load_loader(py_object.__class__) + assert py_obj_type is dict and nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) + + backup_builtins = sys.modules['builtins'] + moc_import_lib.delitem(sys.modules,'builtins') + # TODO when warning is added run check for warning + py_obj_type,nopickleloader = loader.load_loader(py_object.__class__) + assert py_obj_type is dict and nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) + moc_import_lib.setitem(sys.modules,'builtins',backup_builtins) + + + # redirect load_builtins loader to tests/hickle_loader path + moc_import_lib.setattr("importlib.util.spec_from_file_location",patch_importlib_util_spec_from_file_location) + moc_import_lib.setattr("hickle.lookup.spec_from_file_location",patch_importlib_util_spec_from_file_location) + #py_obj_type,nopickleloader = loader.load_loader(py_object.__class__) + #assert py_obj_type is dict and nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) + moc_import_lib.setattr("importlib.util.find_spec",patch_importlib_util_find_spec) + moc_import_lib.setattr("hickle.lookup.find_spec",patch_importlib_util_find_spec) + moc_import_lib.setattr("importlib.util.spec_from_loader",patch_importlib_util_spec_from_loader) + moc_import_lib.setattr("hickle.lookup.spec_from_loader",patch_importlib_util_spec_from_loader) + + # try to find appropriate loader for dict object, a moc of this + # loader should be provided by hickle/tests/hickle_loaders/load_builtins + # module ensure that this module is the one found by load_loader function + import hickle.tests.hickle_loaders.load_builtins as load_builtins + moc_import_lib.setitem(sys.modules,loader_name,load_builtins) + moc_import_lib.setattr("importlib.util.spec_from_loader",patch_importlib_util_spec_from_tests_loader) + moc_import_lib.setattr("hickle.lookup.spec_from_loader",patch_importlib_util_spec_from_tests_loader) + py_obj_type,nopickleloader = loader.load_loader(py_object.__class__) + assert py_obj_type is dict and nopickleloader == (load_builtins.create_package_test,b'dict',True) + + backup_load_builtins = sys.modules.pop('hickle.loaders.load_builtins',None) + backup_py_obj_type = loader.types_dict.pop(dict,None) + backup_loaded_loaders = lookup.LoaderManager.__loaded_loaders__.discard('hickle.loaders.load_builtins') + moc_import_lib.setattr("importlib.util.find_spec",patch_importlib_util_find_spec_no_load_builtins) + moc_import_lib.setattr("hickle.lookup.find_spec",patch_importlib_util_find_spec_no_load_builtins) + py_obj_type,nopickleloader = loader.load_loader(py_object.__class__) + assert py_obj_type is dict + assert nopickleloader == (sys.modules['hickle.loaders.load_builtins'].create_package_test,b'dict',True) + moc_import_lib.setattr("importlib.util.spec_from_loader",patch_importlib_util_spec_from_loader) + moc_import_lib.setattr("hickle.lookup.spec_from_loader",patch_importlib_util_spec_from_loader) + moc_import_lib.setattr("importlib.util.find_spec",patch_importlib_util_find_spec) + moc_import_lib.setattr("hickle.lookup.find_spec",patch_importlib_util_find_spec) + sys.modules['hickle.loaders.load_builtins'] = backup_load_builtins + loader.types_dict[dict] = backup_py_obj_type + lookup._custom_loader_enabled_builtins.pop(py_obj_type.__class__.__module__,None) + + # preload dataset only loader and check that it can be resolved directly + loader_spec = loader_table[0] + lookup.LoaderManager.register_class(*loader_spec) + assert loader.load_loader((12).__class__) == (loader_spec[0],(*loader_spec[2:0:-1],loader_spec[5])) + + # try to find appropriate loader for dict object, a moc of this + # should have already been imported above + assert loader.load_loader(py_object.__class__) == (dict,(load_builtins.create_package_test,b'dict',True)) + + # remove loader again and undo redirection again. dict should now be + # processed by create_pickled_dataset + moc_import_lib.delitem(sys.modules,loader_name) + del lookup.LoaderManager.__py_types__[None][dict] + py_obj_type,nopickleloader = loader.load_loader(py_object.__class__) + assert py_obj_type is dict and nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) + + # check that load_loader prevenst redefinition of loaders to be predefined by hickle core + with pytest.raises( + RuntimeError, + match = r"objects\s+defined\s+by\s+hickle\s+core\s+must\s+be" + r"\s+registerd\s+before\s+first\s+dump\s+or\s+load" + ): + py_obj_type,nopickleloader = loader.load_loader(ToBeInLoadersOrNotToBe) + monkeypatch.setattr(ToBeInLoadersOrNotToBe,'__module__','hickle.loaders') + + # check that load_loaders issues drop warning upon loader definitions for + # dummy objects defined within hickle package but outsied loaders modules + with pytest.warns( + RuntimeWarning, + match = r"ignoring\s+'.+'\s+dummy\s+type\s+not\s+defined\s+by\s+loader\s+module" + ): + py_obj_type,nopickleloader = loader.load_loader(ToBeInLoadersOrNotToBe) + assert py_obj_type is ToBeInLoadersOrNotToBe + assert nopickleloader == (lookup.create_pickled_dataset,b'pickle',True) + + # check that loader definitions for dummy objets defined by loaders work as expected + # by loader module + monkeypatch.setattr(ToBeInLoadersOrNotToBe,'__module__',loader_name) + py_obj_type,(create_dataset,base_type,memoise) = loader.load_loader(ToBeInLoadersOrNotToBe) + assert py_obj_type is ToBeInLoadersOrNotToBe and base_type == b'NotHicklable' + assert create_dataset is not_dumpable + assert memoise == False + + # remove loader_name from list of loaded loaders and check that loader is loaded anew + # and that values returned for dict object correspond to loader + # provided by freshly loaded loader module + lookup.LoaderManager.__loaded_loaders__.remove(loader_name) + py_obj_type,(create_dataset,base_type,memoise) = loader.load_loader(py_object.__class__) + load_builtins_moc = sys.modules.get(loader_name,None) + assert load_builtins_moc is not None + loader_spec = load_builtins_moc.class_register[0] + assert py_obj_type is dict and create_dataset is loader_spec[2] + assert base_type is loader_spec[1] + assert memoise == True def test_type_legacy_mro(): """ @@ -479,12 +676,19 @@ def test_create_pickled_dataset(h5_data): # check if create_pickled_dataset issues SerializedWarning for objects which # either do not support copy protocol py_object = ClassToDump('hello',1) + pickled_py_object = pickle.dumps(py_object) data_set_name = "greetings" with pytest.warns(lookup.SerializedWarning,match = r".*type\s+not\s+understood,\s+data\s+is\s+serialized:.*") as warner: h5_node,subitems = lookup.create_pickled_dataset(py_object, h5_data,data_set_name) assert isinstance(h5_node,h5py.Dataset) and not subitems and iter(subitems) - assert bytes(h5_node[()]) == pickle.dumps(py_object) and h5_node.name.split('/')[2] == data_set_name + assert bytes(h5_node[()]) == pickled_py_object and h5_node.name.rsplit('/',1)[-1] == data_set_name assert lookup.load_pickled_data(h5_node,b'pickle',object) == py_object + backup_class_to_dump = globals()['ClassToDump'] + backup_class_to_dump = globals().pop('ClassToDump',None) + recovered = lookup.load_pickled_data(h5_node,b'pickle',object) + assert isinstance(recovered,lookup.RecoveredDataset) + assert bytes(recovered) == pickled_py_object + globals()['ClassToDump'] = backup_class_to_dump def test__DictItemContainer(): @@ -577,6 +781,9 @@ def test_ReferenceManager_get_root(h5_data): assert lookup.ReferenceManager.get_root(some_list_item).id == root_group.file.id +class not_a_surviver(): + """does not survive pickle.dumps""" + def test_ReferenceManager(h5_data): """ test for creation of ReferenceManager object (__init__) @@ -606,15 +813,28 @@ def test_ReferenceManager(h5_data): list_base_type = b'list' list_base_type = type_table.create_dataset(list_base_type,shape=None,dtype="S1") list_entry.attrs['base_type'] = list_base_type.ref + + missing_pickle_string = bytearray(pickle.dumps(not_a_surviver)) + missing_np_entry = np.array(missing_pickle_string,copy = False) + missing_np_entry.dtype = 'S1' + missing_entry = type_table.create_dataset(str(len(type_table)),data = missing_np_entry,shape=(1,missing_np_entry.size)) + missing_base_type = b'lost' + missing_base_type = type_table.create_dataset(missing_base_type,shape=None,dtype="S1") + missing_entry.attrs['base_type'] = missing_base_type.ref + hide_not_a_surviver = globals().pop('not_a_surviver',None) reference_manager = lookup.ReferenceManager(h5_data) + globals()['not_a_surviver'] = hide_not_a_surviver assert reference_manager._py_obj_type_link[id(int)] == int_entry - assert reference_manager._py_obj_type_link[int_entry.id] == (id(int),b'int') + assert reference_manager._py_obj_type_link[int_entry.id] == (int,b'int') assert reference_manager._base_type_link[b'int'] == int_base_type assert reference_manager._base_type_link[int_base_type.id] == b'int' assert reference_manager._py_obj_type_link[id(list)] == list_entry - assert reference_manager._py_obj_type_link[list_entry.id] == (id(list),b'list') + assert reference_manager._py_obj_type_link[list_entry.id] == (list,b'list') assert reference_manager._base_type_link[b'list'] == list_base_type assert reference_manager._base_type_link[list_base_type.id] == b'list' + assert reference_manager._base_type_link[b'lost'] == missing_base_type + assert reference_manager._base_type_link[missing_base_type.id] == b'lost' + assert reference_manager._py_obj_type_link[missing_entry.id] == (lookup.AttemptRecoverCustom,'!recover!',b'lost') backup_attr = list_entry.attrs['base_type'] list_entry.attrs.pop('base_type',None) with pytest.raises(lookup.ReferenceError): @@ -628,9 +848,10 @@ def test_ReferenceManager(h5_data): with pytest.raises(lookup.ReferenceError): reference_manager = lookup.ReferenceManager(h5_data) list_entry.attrs['base_type']=backup_attr + old_hickle_file_root = h5_data.file.create_group('old_root') h5_data.file.flush() - base_name,ext = h5_data.file.filename.split('.') + base_name,ext = h5_data.file.filename.rsplit('.',1) file_name = "{}_ro.{}".format(base_name,ext) shutil.copyfile(h5_data.file.filename,file_name) data_name = h5_data.name @@ -643,8 +864,9 @@ def test_ReferenceManager(h5_data): assert overlay_file.mode == 'r+' and overlay_file.driver == 'core' assert overlay_file.id != read_only_handle.id reference_manager = lookup.ReferenceManager(h5_read_data) + + read_only_handle.close() - class SubReferenceManager(lookup.ReferenceManager): __managers__ = () assert SubReferenceManager.__managers__ is lookup.ReferenceManager.__managers__ @@ -660,6 +882,8 @@ class OtherManager(lookup.BaseManager): pass + + def test_ReferenceManager_drop_manager(h5_data): """ test static ReferenceManager._drop_table method @@ -677,7 +901,6 @@ def test_ReferenceManager_drop_manager(h5_data): assert not lookup.ReferenceManager.__managers__ some_other_file.close() - def test_ReferenceManager_create_manager(h5_data): """ test public static ReferenceManager.create_manager function @@ -685,8 +908,9 @@ def test_ReferenceManager_create_manager(h5_data): second_tree = h5_data.file.create_group('seondary_root') h5_data_table = lookup.ReferenceManager.create_manager(h5_data) assert lookup.ReferenceManager.__managers__[h5_data.file.id][0] is h5_data_table - with pytest.raises(lookup.ReferenceError): + with pytest.raises(lookup.LookupError): second_table = lookup.ReferenceManager.create_manager(second_tree) + lookup.ReferenceManager._drop_manager(h5_data.file.id) def test_ReferenceManager_context(h5_data): """ @@ -701,7 +925,7 @@ def test_ReferenceManager_context(h5_data): memo.__exit__(None,None,None) old_hickle_file_root = h5_data.file.create_group('old_root') h5_data.file.flush() - base_name,ext = h5_data.file.filename.split('.') + base_name,ext = h5_data.file.filename.rsplit('.',1) file_name = "{}_ro.{}".format(base_name,ext) shutil.copyfile(h5_data.file.filename,file_name) data_name = old_hickle_file_root.name @@ -730,7 +954,25 @@ def test_ReferenceManager_store_type(h5_data): type_table_entry = h5_data.file[h_node.attrs['type']] assert pickle.loads(type_table_entry[()]) is list assert isinstance(type_table_entry.attrs['base_type'],h5py.Reference) - assert h5_data.file[type_table_entry.attrs['base_type']].name.split('/')[-1].encode('ascii') == b'list' + assert h5_data.file[type_table_entry.attrs['base_type']].name.rsplit('/',1)[-1].encode('ascii') == b'list' + +def test_ReferenceManager_get_manager(h5_data): + h_node = h5_data.create_group('some_list') + item_data = np.array(memoryview(b'hallo welt lore grueszet dich ipsum aus der lore von ipsum gelort in ipsum'),copy=False) + item_data.dtype = 'S1' + h_item = h_node.create_dataset('0',data=item_data,shape=(1,item_data.size)) + with lookup.ReferenceManager.create_manager(h5_data) as memo: + memo.store_type(h_node,list,b'list') + memo.store_type(h_item,bytes,b'bytes') + assert lookup.ReferenceManager.get_manager(h_item) == memo + + backup_manager = lookup.ReferenceManager.__managers__.pop(h5_data.file.id,None) + assert backup_manager is not None + with pytest.raises(lookup.ReferenceError): + manager = lookup.ReferenceManager.get_manager(h_item) + lookup.ReferenceManager.__managers__[h5_data.file.id] = backup_manager + with pytest.raises(lookup.ReferenceError): + manager = lookup.ReferenceManager.get_manager(h_item) def test_ReferenceManager_resolve_type(h5_data): """ @@ -752,6 +994,7 @@ def test_ReferenceManager_resolve_type(h5_data): new_style_typed = h5_data.create_dataset('new_style_typed',data = 12) stale_new_style = h5_data.create_dataset('stale_new_style',data = 12) new_style_typed_no_link = h5_data.create_dataset('new_style_typed_no_link',data = 12.5) + has_not_recoverable_type = h5_data.create_dataset('no_recoverable_type',data = 42.56) with lookup.ReferenceManager.create_manager(h5_data) as memo: with pytest.raises(lookup.ReferenceError): memo.resolve_type(invalid_pickle_and_ref) @@ -789,8 +1032,21 @@ def test_ReferenceManager_resolve_type(h5_data): del memo._base_type_link[b'float'] float_entry.attrs['base_type'] = float_base assert memo.resolve_type(new_style_typed_no_link) == (float,b'float',False) - assert memo.resolve_type(new_style_typed_no_link) + memo.store_type(has_not_recoverable_type,not_a_surviver,b'lost') + del memo._py_obj_type_link[memo._py_obj_type_link[id(not_a_surviver)].id] + del memo._py_obj_type_link[id(not_a_surviver)] + hide_not_a_surviver = globals().pop('not_a_surviver',None) + assert memo.resolve_type(has_not_recoverable_type) == (lookup.AttemptRecoverCustom,b'!recover!',False) + assert memo.resolve_type(has_not_recoverable_type,base_type_type=2) == (lookup.AttemptRecoverCustom,b'lost',False) + globals()['not_a_surviver'] = hide_not_a_surviver + has_not_recoverable_type.attrs['type'] = np.array(pickle.dumps(not_a_surviver)) + has_not_recoverable_type.attrs['base_type'] = b'lost' + hide_not_a_surviver = globals().pop('not_a_surviver',None) + assert memo.resolve_type(has_not_recoverable_type) == (lookup.AttemptRecoverCustom,b'!recover!',False) + assert memo.resolve_type(has_not_recoverable_type,base_type_type=2) == (lookup.AttemptRecoverCustom,b'lost',False) + globals()['not_a_surviver'] = hide_not_a_surviver + def test_ExpandReferenceContainer(h5_data): """ @@ -814,18 +1070,80 @@ def test_ExpandReferenceContainer(h5_data): sub_container.append(name,content,subitem.attrs) +def test_recover_custom_data(h5_data): + array_to_recover = np.random.random_sample([4,2]) + with lookup.ReferenceManager.create_manager(h5_data) as memo: + dataset_to_recover = h5_data.create_dataset('to_recover',data=array_to_recover) + dataset_to_recover.attrs['world'] = 2 + memo.store_type(dataset_to_recover,ClassToDump,b'myclass') + group_to_recover = h5_data.create_group('need_recover') + memo.store_type(group_to_recover,ClassToDump,b'myclass') + backup_class_to_dump = globals().pop('ClassToDump',None) + memo._py_obj_type_link.pop(id('ClassToDump'),None) + memo._base_type_link.pop(b'myclass') + type_entry = memo._py_obj_type_table[dataset_to_recover.attrs['type']] + memo._py_obj_type_link.pop(type_entry.id,None) + py_obj_type,base_type,is_group = memo.resolve_type(dataset_to_recover) + assert issubclass(py_obj_type,lookup.AttemptRecoverCustom) and base_type == b'!recover!' + recovered = lookup.recover_custom_dataset(dataset_to_recover,base_type,py_obj_type) + assert recovered.dtype == array_to_recover.dtype and np.all(recovered == array_to_recover) + assert recovered.attrs == {'base_type':b'myclass','world':2} + assert not is_group + type_entry = memo._py_obj_type_table[group_to_recover.attrs['type']] + memo._py_obj_type_link.pop(type_entry.id,None) + some_int=group_to_recover.create_dataset('some_int',data=42) + some_float=group_to_recover.create_dataset('some_float',data=42.0) + group_to_recover.attrs['so'] = 'long' + group_to_recover.attrs['and'] = 'thanks' + some_float.attrs['for'] = 'all' + some_float.attrs['the'] = 'fish' + py_obj_type,base_type,is_group = memo.resolve_type(group_to_recover) + assert issubclass(py_obj_type,lookup.AttemptRecoverCustom) and base_type == b'!recover!' + assert is_group + recover_container = lookup.RecoverGroupContainer(group_to_recover.attrs,base_type,py_obj_type) + for name,item in recover_container.filter(group_to_recover): + recover_container.append(name,item[()],item.attrs) + recover_container.append('some_other',recovered,recovered.attrs) + recovered_group = recover_container.convert() + assert isinstance(recovered_group,dict) + assert some_float[()] == recovered_group['some_float'][0] and some_float.attrs == recovered_group['some_float'][1] + assert some_int[()] == recovered_group['some_int'][0] and some_int.attrs == recovered_group['some_int'][1] + assert recovered_group['some_other'] is recovered + assert recovered_group.attrs['base_type'] == memo.resolve_type(group_to_recover,base_type_type=2)[1] + assert len(recovered_group.attrs) == 3 + assert recovered_group.attrs['so'] == 'long' and recovered_group.attrs['and'] == 'thanks' + globals()['ClassToDump'] = backup_class_to_dump + # %% MAIN SCRIPT if __name__ == "__main__": from _pytest.monkeypatch import monkeypatch from _pytest.fixtures import FixtureRequest + for h5_root in h5_data(FixtureRequest(test_create_pickled_dataset)): + test_AttemptRecoverCustom_classes(h5_data) for table in loader_table(): - test_register_class(table) + test_LoaderManager_register_class(table) for table in loader_table(): - test_register_class_exclude(table) - for monkey in monkeypatch(): - for table in loader_table(): - test_load_loader(table,monkey) + test_LoaderManager_register_class_exclude(table) + for table,h5_root in ( + (tab,root) + for tab in loader_table() + for root in h5_data(FixtureRequest(test_LoaderManager)) + ): + test_LoaderManager(table,h5_root) + for h5_root in h5_data(FixtureRequest(test_LoaderManager_drop_manager)): + test_LoaderManager_drop_manager(h5_root) + for h5_root in h5_data(FixtureRequest(test_LoaderManager_create_manager)): + test_LoaderManager_create_manager(h5_root) + for h5_root in h5_data(FixtureRequest(test_LoaderManager_context)): + test_LoaderManager_context(h5_root) + for table,h5_root,monkey in ( + (tab,root,mpatch) + for tab in loader_table() + for root in h5_data(FixtureRequest(test_LoaderManager_load_loader)) + for mpatch in monkeypatch() + ): + test_LoaderManager_load_loader(table,h5_root,monkey) test_type_legacy_mro() for h5_root in h5_data(FixtureRequest(test_create_pickled_dataset)): test_create_pickled_dataset(h5_root) @@ -837,18 +1155,22 @@ def test_ExpandReferenceContainer(h5_data): test_ReferenceManager_get_root(h5_root) for h5_root in h5_data(FixtureRequest(test_ReferenceManager)): test_ReferenceManager(h5_root) - for h5_root in h5_data(FixtureRequest(test_ReferenceManager_drop_table)): - test_ReferenceManager_drop_table(h5_root) + for h5_root in h5_data(FixtureRequest(test_ReferenceManager_drop_manager)): + test_ReferenceManager_drop_manager(h5_root) for h5_root in h5_data(FixtureRequest(test_ReferenceManager_create_manager)): test_ReferenceManager_create_manager(h5_root) for h5_root in h5_data(FixtureRequest(test_ReferenceManager_context)): test_ReferenceManager_context(h5_root) + for h5_root in h5_data(FixtureRequest(test_ReferenceManager_get_manager)): + test_ReferenceManager_get_manager(h5_root) for h5_root in h5_data(FixtureRequest(test_ReferenceManager_store_type)): test_ReferenceManager_store_type(h5_root) for h5_root in h5_data(FixtureRequest(test_ReferenceManager_resolve_type)): test_ReferenceManager_resolve_type(h5_root) for h5_root in h5_data(FixtureRequest(test_ExpandReferenceContainer)): test_ExpandReferenceContainer(h5_root) + for h5_root in h5_data(FixtureRequest(test_ExpandReferenceContainer)): + test_recover_custom_data(h5_data) diff --git a/hickle/tests/test_99_hickle_core.py b/hickle/tests/test_99_hickle_core.py index 2bd504ff..6263ec7f 100644 --- a/hickle/tests/test_99_hickle_core.py +++ b/hickle/tests/test_99_hickle_core.py @@ -101,66 +101,66 @@ def test_recursive_dump(h5_data): # sets appropriate values for 'type' and 'base_type' attributes data = simple_list = [1,2,3,4] with lookup.ReferenceManager.create_manager(h5_data) as memo: - hickle._dump(data, h5_data, "simple_list",memo) - dumped_data = h5_data["simple_list"] - assert memo.resolve_type(dumped_data) == (data.__class__,b'list',False) - assert np.all(dumped_data[()] == simple_list) - - # check that dump function properly creats a group representing - # a dictionary and its keys and values and sets appropriate values - # for 'type', 'base_type' and 'key_base_type' attributes - data = { - '12':12, - (1,2,3):'hallo' - } - hickle._dump(data, h5_data, "some_dict",memo) - dumped_data = h5_data["some_dict"] - assert memo.resolve_type(dumped_data) == (data.__class__,b'dict',True) - - # check that the name of the resulting dataset for the first dict item - # resembles double quouted string key and 'type', 'base_type 'key_base_type' - # attributes the resulting dataset are set accordingly - first_item = dumped_data['"12"'] - assert first_item[()] == 12 and first_item.attrs['key_base_type'] == b'str' - assert memo.resolve_type(first_item) == (data['12'].__class__,b'int',False) - #assert first_item.attrs['base_type'] == b'int' - #assert first_item.attrs['type'] == pickle.dumps(data['12'].__class__) + with lookup.LoaderManager.create_manager(h5_data) as loader: + hickle._dump(data, h5_data, "simple_list",memo,loader) + dumped_data = h5_data["simple_list"] + assert memo.resolve_type(dumped_data) == (data.__class__,b'list',False) + assert np.all(dumped_data[()] == simple_list) - # check that second item is converted into key value pair group, that - # the name of that group reads 'data0' and that 'type', 'base_type' and - # 'key_base_type' attributes are set accordingly - second_item = dumped_data.get("data0",None) - if second_item is None: - second_item = dumped_data["data1"] - assert second_item.attrs['key_base_type'] == b'key_value' - assert memo.resolve_type(second_item) == (tuple,b'tuple',True) - #assert second_item.attrs['type'] == pickle.dumps(tuple) - - # check that content of key value pair group resembles key and value of - # second dict item - key = second_item['data0'] - value = second_item['data1'] - assert np.all(key[()] == (1,2,3)) - # and key.attrs['base_type'] == b'tuple' - assert memo.resolve_type(key) == (tuple,b'tuple',False) - assert bytes(value[()]) == 'hallo'.encode('utf8') - # and value.attrs['base_type'] == b'str' - assert memo.resolve_type(value) == (str,b'str',False) - - # check that objects for which no loader has been registred or for which - # available loader raises NotHicklable exception are handled by - # create_pickled_dataset function - backup_dict_loader = lookup.types_dict[dict] - def fail_create_dict(py_obj,h_group,name,**kwargs): - raise helpers.NotHicklable("test loader shrugg") - lookup.types_dict[dict] = (fail_create_dict,*backup_dict_loader[1:]) - memo_backup = memo.pop(id(data),None) - with pytest.warns(lookup.SerializedWarning): - hickle._dump(data, h5_data, "pickled_dict",memo) - dumped_data = h5_data["pickled_dict"] - lookup.types_dict[dict] = backup_dict_loader - assert bytes(dumped_data[()]) == pickle.dumps(data) - memo[id(data)] = memo_backup + # check that dump function properly creats a group representing + # a dictionary and its keys and values and sets appropriate values + # for 'type', 'base_type' and 'key_base_type' attributes + data = { + '12':12, + (1,2,3):'hallo' + } + hickle._dump(data, h5_data, "some_dict",memo,loader) + dumped_data = h5_data["some_dict"] + assert memo.resolve_type(dumped_data) == (data.__class__,b'dict',True) + + # check that the name of the resulting dataset for the first dict item + # resembles double quouted string key and 'type', 'base_type 'key_base_type' + # attributes the resulting dataset are set accordingly + first_item = dumped_data['"12"'] + assert first_item[()] == 12 and first_item.attrs['key_base_type'] == b'str' + assert memo.resolve_type(first_item) == (data['12'].__class__,b'int',False) + #assert first_item.attrs['base_type'] == b'int' + #assert first_item.attrs['type'] == pickle.dumps(data['12'].__class__) + + # check that second item is converted into key value pair group, that + # the name of that group reads 'data0' and that 'type', 'base_type' and + # 'key_base_type' attributes are set accordingly + second_item = dumped_data.get("data0",None) + if second_item is None: + second_item = dumped_data["data1"] + assert second_item.attrs['key_base_type'] == b'key_value' + assert memo.resolve_type(second_item) == (tuple,b'tuple',True) + #assert second_item.attrs['type'] == pickle.dumps(tuple) + + # check that content of key value pair group resembles key and value of + # second dict item + key = second_item['data0'] + value = second_item['data1'] + assert np.all(key[()] == (1,2,3)) + # and key.attrs['base_type'] == b'tuple' + assert memo.resolve_type(key) == (tuple,b'tuple',False) + assert bytes(value[()]) == 'hallo'.encode('utf8') + # and value.attrs['base_type'] == b'str' + assert memo.resolve_type(value) == (str,b'str',False) + + # check that objects for which no loader has been registred or for which + # available loader raises NotHicklable exception are handled by + # create_pickled_dataset function + def fail_create_dict(py_obj,h_group,name,**kwargs): + raise helpers.NotHicklable("test loader shrugg") + loader.types_dict.maps.insert(0,{dict:(fail_create_dict,*loader.types_dict[dict][1:])}) + memo_backup = memo.pop(id(data),None) + with pytest.warns(lookup.SerializedWarning): + hickle._dump(data, h5_data, "pickled_dict",memo,loader) + dumped_data = h5_data["pickled_dict"] + assert bytes(dumped_data[()]) == pickle.dumps(data) + loader.types_dict.maps.pop(0) + memo[id(data)] = memo_backup def test_recursive_load(h5_data): """ @@ -172,35 +172,35 @@ def test_recursive_load(h5_data): data = 42 data_name = "the_answer" with lookup.ReferenceManager.create_manager(h5_data) as memo: - hickle._dump(data, h5_data, data_name,memo) - py_container = hickle.RootContainer(h5_data.attrs,b'hickle_root',hickle.RootContainer) - hickle._load(py_container, data_name, h5_data[data_name],memo) - assert py_container.convert() == data - - # check that dict object is properly restored on load from corresponding group - data = {'question':None,'answer':42} - data_name = "not_formulated" - hickle._dump(data, h5_data, data_name,memo) - py_container = hickle.RootContainer(h5_data.attrs,b'hickle_root',hickle.RootContainer) - hickle._load(py_container, data_name, h5_data[data_name],memo) - assert py_container.convert() == data - + with lookup.LoaderManager.create_manager(h5_data) as loader: + hickle._dump(data, h5_data, data_name,memo,loader) + py_container = hickle.RootContainer(h5_data.attrs,b'hickle_root',hickle.RootContainer) + hickle._load(py_container, data_name, h5_data[data_name],memo,loader) + assert py_container.convert() == data + + # check that dict object is properly restored on load from corresponding group + data = {'question':None,'answer':42} + data_name = "not_formulated" + hickle._dump(data, h5_data, data_name,memo,loader) + py_container = hickle.RootContainer(h5_data.attrs,b'hickle_root',hickle.RootContainer) + hickle._load(py_container, data_name, h5_data[data_name],memo,loader) + assert py_container.convert() == data - # check that objects for which no loader has been registred or for which - # available loader raises NotHicklable exception are properly restored on load - # from corresponding copy protocol group or pickled data string - backup_dict_loader = lookup.types_dict[dict] - def fail_create_dict(py_obj,h_group,name,**kwargs): - raise helpers.NotHicklable("test loader shrugg") - lookup.types_dict[dict] = (fail_create_dict,backup_dict_loader[1],False) - data_name = "pickled_dict" - memo_backup = memo.pop(id(data),None) - with pytest.warns(lookup.SerializedWarning): - hickle._dump(data, h5_data, data_name,memo) - hickle._load(py_container, data_name, h5_data[data_name],memo) - assert py_container.convert() == data - lookup.types_dict[dict] = backup_dict_loader - memo[id(data)] = memo_backup + + # check that objects for which no loader has been registred or for which + # available loader raises NotHicklable exception are properly restored on load + # from corresponding copy protocol group or pickled data string + def fail_create_dict(py_obj,h_group,name,**kwargs): + raise helpers.NotHicklable("test loader shrugg") + loader.types_dict.maps.insert(0,{dict:(fail_create_dict,*loader.types_dict[dict][1:])}) + data_name = "pickled_dict" + memo_backup = memo.pop(id(data),None) + with pytest.warns(lookup.SerializedWarning): + hickle._dump(data, h5_data, data_name,memo,loader) + hickle._load(py_container, data_name, h5_data[data_name],memo,loader) + assert py_container.convert() == data + loader.types_dict.maps.pop(0) + memo[id(data)] = memo_backup # %% ISSUE RELATED TESTS