Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove references to hdmf.Array and region references #236

Merged
merged 6 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Added support for Pathlib paths. @mavaylon1 [#212](https://github.com/hdmf-dev/hdmf-zarr/pull/212)
* Updated packages used for testing and readthedocs configuration. @mavaylon1, @rly [#214](https://github.com/hdmf-dev/hdmf-zarr/pull/214)
* Add `force_overwite` parameter for `ZarrIO.__init__` to allow overwriting an existing file or directory. @oruebel [#229](https://github.com/hdmf-dev/hdmf-zarr/pull/229)
* Remove allowance of `hdmf.Array` in `__init__` of `AbstractZarrTableDataset` and `ZarrDataset` to be compatible with HDMF 4.0. @rly [#236](https://github.com/hdmf-dev/hdmf-zarr/pull/236)

### Bug Fixes
* Fix reading of cached specs and caching of specs during export. @rly [#232](https://github.com/hdmf-dev/hdmf-zarr/pull/232)
Expand Down
44 changes: 4 additions & 40 deletions src/hdmf_zarr/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
DatasetBuilder,
LinkBuilder,
BuildManager,
RegionBuilder,
ReferenceBuilder,
TypeMap)
from hdmf.data_utils import AbstractDataChunkIterator
Expand Down Expand Up @@ -634,10 +633,6 @@ def write_attributes(self, **kwargs):
raise TypeError(str(e) + " type=" + str(type(value)) + " data=" + str(value)) from e
# Case 2: References
elif isinstance(value, (Container, Builder, ReferenceBuilder)):
# TODO: Region References are not yet supported
# if isinstance(value, RegionBuilder):
# type_str = 'region'
# refs = self._create_ref(value.builder)
if isinstance(value, (ReferenceBuilder, Container, Builder)):
type_str = 'object'
if isinstance(value, Builder):
Expand Down Expand Up @@ -741,7 +736,7 @@ def __is_ref(self, dtype):
elif isinstance(dtype, np.dtype):
return False
else:
return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE
return dtype == DatasetBuilder.OBJECT_REF_TYPE

def resolve_ref(self, zarr_ref):
"""
Expand Down Expand Up @@ -792,8 +787,6 @@ def _create_ref(self, ref_object, export_source=None):
:type ref_object: Builder, Container, ReferenceBuilder
:returns: ZarrReference object
"""
if isinstance(ref_object, RegionBuilder): # or region is not None: TODO: Add to support regions
raise NotImplementedError("Region references are currently not supported by ZarrIO")
if isinstance(ref_object, Builder):
if isinstance(ref_object, LinkBuilder):
builder = ref_object.target_builder
Expand All @@ -805,11 +798,6 @@ def _create_ref(self, ref_object, export_source=None):
builder = self.manager.build(ref_object)

path = self.__get_path(builder)
# TODO Add to get region for region references.
# Also add {'name': 'region', 'type': (slice, list, tuple),
# 'doc': 'the region reference indexing object', 'default': None},
# if isinstance(ref_object, RegionBuilder):
# region = ref_object.region

# get the object id if available
object_id = builder.get('object_id', None)
Expand Down Expand Up @@ -1061,12 +1049,7 @@ def write_dataset(self, **kwargs): # noqa: C901
for i, dts in enumerate(options['dtype']):
if self.__is_ref(dts['dtype']):
refs.append(i)
ref_tmp = self._create_ref(data[0][i], export_source=export_source)
if isinstance(ref_tmp, ZarrReference):
dts_str = 'object'
else:
dts_str = 'region'
type_str.append({'name': dts['name'], 'dtype': dts_str})
type_str.append({'name': dts['name'], 'dtype': 'object'})
else:
i = list([dts, ])
t = self.__resolve_dtype_helper__(i)
Expand Down Expand Up @@ -1122,20 +1105,10 @@ def write_dataset(self, **kwargs): # noqa: C901
dset = self.__list_fill__(parent, name, data, options)
# Write a dataset of references
elif self.__is_ref(options['dtype']):
# TODO Region references are not yet support, but here how the code should look
# if isinstance(data, RegionBuilder):
# shape = (1,)
# type_str = 'region'
# refs = self._create_ref(data.builder, data.region)
if isinstance(data, ReferenceBuilder):
shape = (1,)
type_str = 'object'
refs = self._create_ref(data.builder, export_source=export_source)
# TODO: Region References are not yet supported
# elif options['dtype'] == 'region':
# shape = (len(data), )
# type_str = 'region'
# refs = [self._create_ref(item.builder, item.region) for item in data]
else:
shape = (len(data), )
type_str = 'object'
Expand Down Expand Up @@ -1203,7 +1176,6 @@ def write_dataset(self, **kwargs): # noqa: C901
"ref": ZarrReference,
"reference": ZarrReference,
"object": ZarrReference,
"region": ZarrReference,
}

@classmethod
Expand Down Expand Up @@ -1518,20 +1490,15 @@ def __read_dataset(self, zarr_obj, name):
# Check compound dataset where one of the subsets contains references
has_reference = False
for i, dts in enumerate(dtype):
if dts['dtype'] in ['object', 'region']: # check items for object reference
if dts['dtype'] == 'object': # check items for object reference
has_reference = True
break
retrieved_dtypes = [dtype_dict['dtype'] for dtype_dict in dtype]
if has_reference:
# TODO: BuilderZarrTableDataset does not yet support region reference
data = BuilderZarrTableDataset(zarr_obj, self, retrieved_dtypes)
elif self.__is_ref(dtype):
# Array of references
if dtype == 'object':
data = BuilderZarrReferenceDataset(data, self)
# TODO: Resolution of Region reference not yet supported by BuilderZarrRegionDataset
# elif dtype == 'region':
# data = BuilderZarrRegionDataset(data, self)
data = BuilderZarrReferenceDataset(data, self)

kwargs['data'] = data
if name is None:
Expand All @@ -1554,9 +1521,6 @@ def __read_attrs(self, zarr_obj):
ret[k] = self.__read_group(target_zarr_obj, target_name)
else:
ret[k] = self.__read_dataset(target_zarr_obj, target_name)
# TODO Need to implement region references for attributes
elif v['zarr_dtype'] == 'region':
raise NotImplementedError("Read of region references from attributes not implemented in ZarrIO")
else:
raise NotImplementedError("Unsupported zarr_dtype for attribute " + str(v))
else:
Expand Down
54 changes: 3 additions & 51 deletions src/hdmf_zarr/zarr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
from copy import copy
import numpy as np

from zarr import Array as ZarrArray
from zarr import Array

from hdmf.build import DatasetBuilder
from hdmf.data_utils import append_data
from hdmf.array import Array
from hdmf.query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver
from hdmf.utils import docval, popargs, get_docval

Expand All @@ -21,7 +20,7 @@ class ZarrDataset(HDMFDataset):
Extension of HDMFDataset to add Zarr compatibility
"""

@docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray, Array), 'doc': 'the Zarr file lazily evaluate'},
@docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'},
{'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'})
def __init__(self, **kwargs):
self.__io = popargs('io', kwargs)
Expand Down Expand Up @@ -130,7 +129,7 @@ class AbstractZarrTableDataset(DatasetOfReferences):
references in compound datasets to either Builders and Containers.
"""

@docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray, Array), 'doc': 'the Zarr file lazily evaluate'},
@docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'},
{'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'},
{'name': 'types', 'type': (list, tuple),
'doc': 'the list/tuple of reference types'})
Expand All @@ -139,8 +138,6 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)
self.__refgetters = dict()
for i, t in enumerate(types):
# if t is RegionReference: # TODO: Region References not yet supported
# self.__refgetters[i] = self.__get_regref
if t == DatasetBuilder.OBJECT_REF_TYPE:
self.__refgetters[i] = self._get_ref
elif t is str:
Expand All @@ -154,7 +151,6 @@ def __init__(self, **kwargs):
sub = self.dataset.dtype[i]
if np.issubdtype(sub, np.dtype('O')):
tmp.append('object')
# TODO: Region References are not yet supported
if sub.metadata:
if 'vlen' in sub.metadata:
t = sub.metadata['vlen']
Expand Down Expand Up @@ -224,24 +220,6 @@ def dtype(self):
return 'object'


class AbstractZarrRegionDataset(AbstractZarrReferenceDataset):
"""
Extension of DatasetOfReferences to serve as the base class for resolving Zarr
references in datasets to either Builders and Containers.

Note: Region References are not yet supported.
"""

def __getitem__(self, arg):
obj = super().__getitem__(arg)
ref = self.dataset[arg]
return obj[ref]

@property
def dtype(self):
return 'region'


class ContainerZarrTableDataset(ContainerResolverMixin, AbstractZarrTableDataset):
"""
A reference-resolving dataset for resolving references inside tables
Expand Down Expand Up @@ -284,29 +262,3 @@ class BuilderZarrReferenceDataset(BuilderResolverMixin, AbstractZarrReferenceDat
@classmethod
def get_inverse_class(cls):
return ContainerZarrReferenceDataset


class ContainerZarrRegionDataset(ContainerResolverMixin, AbstractZarrRegionDataset):
"""
A reference-resolving dataset for resolving region references that returns
resolved references as Containers

Note: Region References are not yet supported.
"""

@classmethod
def get_inverse_class(cls):
return BuilderZarrRegionDataset


class BuilderZarrRegionDataset(BuilderResolverMixin, AbstractZarrRegionDataset):
"""
A reference-resolving dataset for resolving region references that returns
resolved references as Builders.

Note: Region References are not yet supported.
"""

@classmethod
def get_inverse_class(cls):
return ContainerZarrRegionDataset
Loading