Skip to content

Commit

Permalink
allow codec for compression for use with lindi
Browse files Browse the repository at this point in the history
  • Loading branch information
magland committed Sep 18, 2024
1 parent aed1de3 commit 0e6e07b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 44 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# HDMF Changelog

## HDMF 3.14.6 (Upcoming)

### Enhancements
- Allow Codec for compression for use with LINDI

## HDMF 3.14.5 (September 17, 2024)

### Enhancements
Expand Down
65 changes: 21 additions & 44 deletions src/hdmf/backends/hdf5/h5_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
import logging

from ...array import Array
from ...data_utils import DataIO, AbstractDataChunkIterator, append_data
from ...data_utils import DataIO, AbstractDataChunkIterator
from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver
from ...region import RegionSlicer
from ...spec import SpecWriter, SpecReader
from ...utils import docval, getargs, popargs, get_docval, get_data_shape
from ...utils import docval, getargs, popargs, get_docval


class HDF5IODataChunkIteratorQueue(deque):
Expand Down Expand Up @@ -108,20 +108,6 @@ def ref(self):
def shape(self):
return self.dataset.shape

def append(self, arg):
# Get Builder
builder = self.io.manager.get_builder(arg)
if builder is None:
raise ValueError(
"The container being appended to the dataset has not yet been built. "
"Please write the container to the file, then open the modified file, and "
"append the read container to the dataset."
)

# Get HDF5 Reference
ref = self.io._create_ref(builder)
append_data(self.dataset, ref)


class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta):
"""
Expand Down Expand Up @@ -463,8 +449,8 @@ class H5DataIO(DataIO):
'doc': 'Chunk shape or True to enable auto-chunking',
'default': None},
{'name': 'compression',
'type': (str, bool, int),
'doc': 'Compression strategy. If a bool is given, then gzip compression will be used by default.' +
'type': (str, bool, int, 'Codec'),
'doc': 'Compression strategy. If a bool is given, then gzip compression will be used by default. Codec only applies to LINDI.' +
'http://docs.h5py.org/en/latest/high/dataset.html#dataset-compression',
'default': None},
{'name': 'compression_opts',
Expand Down Expand Up @@ -515,7 +501,7 @@ def __init__(self, **kwargs):
# Check for possible collision with other parameters
if not isinstance(getargs('data', kwargs), Dataset) and self.__link_data:
self.__link_data = False
warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=3)
warnings.warn('link_data parameter in H5DataIO will be ignored', stacklevel=2)

Check warning on line 504 in src/hdmf/backends/hdf5/h5_utils.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/backends/hdf5/h5_utils.py#L504

Added line #L504 was not covered by tests
# Call the super constructor and consume the data parameter
super().__init__(**kwargs)
# Construct the dict with the io args, ignoring all options that were set to None
Expand All @@ -539,22 +525,23 @@ def __init__(self, **kwargs):
self.__iosettings.pop('compression', None)
if 'compression_opts' in self.__iosettings:
warnings.warn('Compression disabled by compression=False setting. ' +
'compression_opts parameter will, therefore, be ignored.', stacklevel=3)
'compression_opts parameter will, therefore, be ignored.', stacklevel=2)
self.__iosettings.pop('compression_opts', None)
# Validate the compression options used
self._check_compression_options()
# Confirm that the compressor is supported by h5py
if not self.filter_available(self.__iosettings.get('compression', None),
self.__allow_plugin_filters):
msg = "%s compression may not be supported by this version of h5py." % str(self.__iosettings['compression'])
if not self.__allow_plugin_filters:
msg += " Set `allow_plugin_filters=True` to enable the use of dynamically-loaded plugin filters."
raise ValueError(msg)
# Confirm that the compressor is supported by h5py (unless we are using Codec with LINDI)
if isinstance(self.__iosettings.get('compression', None), str):
if not self.filter_available(self.__iosettings.get('compression', None),
self.__allow_plugin_filters):
msg = "%s compression may not be supported by this version of h5py." % str(self.__iosettings['compression'])

Check warning on line 536 in src/hdmf/backends/hdf5/h5_utils.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/backends/hdf5/h5_utils.py#L536

Added line #L536 was not covered by tests
if not self.__allow_plugin_filters:
msg += " Set `allow_plugin_filters=True` to enable the use of dynamically-loaded plugin filters."
raise ValueError(msg)

Check warning on line 539 in src/hdmf/backends/hdf5/h5_utils.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/backends/hdf5/h5_utils.py#L538-L539

Added lines #L538 - L539 were not covered by tests
# Check possible parameter collisions
if isinstance(self.data, Dataset):
for k in self.__iosettings.keys():
warnings.warn("%s in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" % k,
stacklevel=3)
stacklevel=2)

self.__dataset = None

Expand Down Expand Up @@ -628,11 +615,12 @@ def _check_compression_options(self):
if szip_opts_error:
raise ValueError("SZIP compression filter compression_opts"
" must be a 2-tuple ('ec'|'nn', even integer 0-32).")
# Warn if compressor other than gzip is being used
if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]:
warnings.warn(str(self.__iosettings['compression']) + " compression may not be available "
"on all installations of HDF5. Use of gzip is recommended to ensure portability of "
"the generated HDF5 files.", stacklevel=4)
# Warn if compressor other than gzip is being used (Unless we are using Codec with LINDI)
if isinstance(self.__iosettings['compression'], str):
if self.__iosettings['compression'] not in ['gzip', h5py_filters.h5z.FILTER_DEFLATE]:
warnings.warn(str(self.__iosettings['compression']) + " compression may not be available "

Check warning on line 621 in src/hdmf/backends/hdf5/h5_utils.py

View check run for this annotation

Codecov / codecov/patch

src/hdmf/backends/hdf5/h5_utils.py#L621

Added line #L621 was not covered by tests
"on all installations of HDF5. Use of gzip is recommended to ensure portability of "
"the generated HDF5 files.", stacklevel=3)

@staticmethod
def filter_available(filter, allow_plugin_filters):
Expand Down Expand Up @@ -672,14 +660,3 @@ def valid(self):
if isinstance(self.data, Dataset) and not self.data.id.valid:
return False
return super().valid

@property
def maxshape(self):
if 'maxshape' in self.io_settings:
return self.io_settings['maxshape']
elif hasattr(self.data, 'maxshape'):
return self.data.maxshape
elif hasattr(self, "shape"):
return self.shape
else:
return get_data_shape(self.data)

0 comments on commit 0e6e07b

Please sign in to comment.