From 0e6e07b3a6aee476e1a21fbfeb257a5a8ff7295e Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Wed, 18 Sep 2024 16:02:20 -0400 Subject: [PATCH] allow codec for compression for use with lindi --- CHANGELOG.md | 5 +++ src/hdmf/backends/hdf5/h5_utils.py | 65 ++++++++++-------------------- 2 files changed, 26 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e56cde40..c75e344ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # HDMF Changelog +## HDMF 3.14.6 (Upcoming) + +### Enhancements +- Allow Codec for compression for use with LINDI + ## HDMF 3.14.5 (September 17, 2024) ### Enhancements diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index 2d7187721..b4f148a90 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -17,11 +17,11 @@ import logging from ...array import Array -from ...data_utils import DataIO, AbstractDataChunkIterator, append_data +from ...data_utils import DataIO, AbstractDataChunkIterator from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver from ...region import RegionSlicer from ...spec import SpecWriter, SpecReader -from ...utils import docval, getargs, popargs, get_docval, get_data_shape +from ...utils import docval, getargs, popargs, get_docval class HDF5IODataChunkIteratorQueue(deque): @@ -108,20 +108,6 @@ def ref(self): def shape(self): return self.dataset.shape - def append(self, arg): - # Get Builder - builder = self.io.manager.get_builder(arg) - if builder is None: - raise ValueError( - "The container being appended to the dataset has not yet been built. " - "Please write the container to the file, then open the modified file, and " - "append the read container to the dataset." - ) - - # Get HDF5 Reference - ref = self.io._create_ref(builder) - append_data(self.dataset, ref) - class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta): """ @@ -463,8 +449,8 @@ class H5DataIO(DataIO): 'doc': 'Chunk shape or True to enable auto-chunking', 'default': None}, {'name': 'compression', - 'type': (str, bool, int), - 'doc': 'Compression strategy. If a bool is given, then gzip compression will be used by default.' + + 'type': (str, bool, int, 'Codec'), + 'doc': 'Compression strategy. If a bool is given, then gzip compression will be used by default. Codec only applies to LINDI.' + 'http://docs.h5py.org/en/latest/high/dataset.html#dataset-compression', 'default': None}, {'name': 'compression_opts', @@ -515,7 +501,7 @@ def __init__(self, **kwargs): # Check for possible collision with other parameters if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data: self.__link_data = False - warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=3) + warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=2) # Call the super constructor and consume the data parameter super().__init__(**kwargs) # Construct the dict with the io args, ignoring all options that were set to None @@ -539,22 +525,23 @@ def __init__(self, **kwargs): self.__iosettings.pop('compression', None) if 'compression_opts' in self.__iosettings: warnings.warn('Compression disabled by compression=False setting. ' + - 'compression_opts parameter will, therefore, be ignored.', stacklevel=3) + 'compression_opts parameter will, therefore, be ignored.', stacklevel=2) self.__iosettings.pop('compression_opts', None) # Validate the compression options used self._check_compression_options() - # Confirm that the compressor is supported by h5py - if not self.filter_available(self.__iosettings.get('compression', None), - self.__allow_plugin_filters): - msg = "%s compression may not be supported by this version of h5py." % str(self.__iosettings['compression']) - if not self.__allow_plugin_filters: - msg += " Set `allow_plugin_filters=True` to enable the use of dynamically-loaded plugin filters." - raise ValueError(msg) + # Confirm that the compressor is supported by h5py (unless we are using Codec with LINDI) + if isinstance(self.__iosettings.get('compression', None), str): + if not self.filter_available(self.__iosettings.get('compression', None), + self.__allow_plugin_filters): + msg = "%s compression may not be supported by this version of h5py." % str(self.__iosettings['compression']) + if not self.__allow_plugin_filters: + msg += " Set `allow_plugin_filters=True` to enable the use of dynamically-loaded plugin filters." + raise ValueError(msg) # Check possible parameter collisions if isinstance(self.data, Dataset): for k in self.__iosettings.keys(): warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k, - stacklevel=3) + stacklevel=2) self.__dataset = None @@ -628,11 +615,12 @@ def _check_compression_options(self): if szip_opts_error: raise ValueError("SZIP compression filter compression_opts" " must be a 2-tuple ('ec'|'nn', even integer 0-32).") - # Warn if compressor other than gzip is being used - if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]: - warnings.warn(str(self.__iosettings['compression']) + " compression may not be available " - "on all installations of HDF5. Use of gzip is recommended to ensure portability of " - "the generated HDF5 files.", stacklevel=4) + # Warn if compressor other than gzip is being used (Unless we are using Codec with LINDI) + if isinstance(self.__iosettings['compression'], str): + if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]: + warnings.warn(str(self.__iosettings['compression']) + " compression may not be available " + "on all installations of HDF5. Use of gzip is recommended to ensure portability of " + "the generated HDF5 files.", stacklevel=3) @staticmethod def filter_available(filter, allow_plugin_filters): @@ -672,14 +660,3 @@ def valid(self): if isinstance(self.data, Dataset) and not self.data.id.valid: return False return super().valid - - @property - def maxshape(self): - if 'maxshape' in self.io_settings: - return self.io_settings['maxshape'] - elif hasattr(self.data, 'maxshape'): - return self.data.maxshape - elif hasattr(self, "shape"): - return self.shape - else: - return get_data_shape(self.data)