Skip to content

Commit

Permalink
Remove references to hdmf.Array and region references (#236)
Browse files Browse the repository at this point in the history
  • Loading branch information
rly authored Nov 18, 2024
1 parent b69c3e0 commit 5a9055d
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 91 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* Added support for Pathlib paths. @mavaylon1 [#212](https://github.com/hdmf-dev/hdmf-zarr/pull/212)
* Updated packages used for testing and readthedocs configuration. @mavaylon1, @rly [#214](https://github.com/hdmf-dev/hdmf-zarr/pull/214)
* Add `force_overwite` parameter for `ZarrIO.__init__` to allow overwriting an existing file or directory. @oruebel [#229](https://github.com/hdmf-dev/hdmf-zarr/pull/229)
* Remove allowance of `hdmf.Array` in `__init__` of `AbstractZarrTableDataset` and `ZarrDataset` to be compatible with HDMF 4.0. @rly [#236](https://github.com/hdmf-dev/hdmf-zarr/pull/236)

### Bug Fixes
* Fix reading of cached specs and caching of specs during export. @rly [#232](https://github.com/hdmf-dev/hdmf-zarr/pull/232)
Expand Down
44 changes: 4 additions & 40 deletions src/hdmf_zarr/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
DatasetBuilder,
LinkBuilder,
BuildManager,
RegionBuilder,
ReferenceBuilder,
TypeMap)
from hdmf.data_utils import AbstractDataChunkIterator
Expand Down Expand Up @@ -634,10 +633,6 @@ def write_attributes(self, **kwargs):
raise TypeError(str(e) + " type=" + str(type(value)) + " data=" + str(value)) from e
# Case 2: References
elif isinstance(value, (Container, Builder, ReferenceBuilder)):
# TODO: Region References are not yet supported
# if isinstance(value, RegionBuilder):
# type_str = 'region'
# refs = self._create_ref(value.builder)
if isinstance(value, (ReferenceBuilder, Container, Builder)):
type_str = 'object'
if isinstance(value, Builder):
Expand Down Expand Up @@ -741,7 +736,7 @@ def __is_ref(self, dtype):
elif isinstance(dtype, np.dtype):
return False
else:
return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE
return dtype == DatasetBuilder.OBJECT_REF_TYPE

def resolve_ref(self, zarr_ref):
"""
Expand Down Expand Up @@ -792,8 +787,6 @@ def _create_ref(self, ref_object, export_source=None):
:type ref_object: Builder, Container, ReferenceBuilder
:returns: ZarrReference object
"""
if isinstance(ref_object, RegionBuilder): # or region is not None: TODO: Add to support regions
raise NotImplementedError("Region references are currently not supported by ZarrIO")
if isinstance(ref_object, Builder):
if isinstance(ref_object, LinkBuilder):
builder = ref_object.target_builder
Expand All @@ -805,11 +798,6 @@ def _create_ref(self, ref_object, export_source=None):
builder = self.manager.build(ref_object)

path = self.__get_path(builder)
# TODO Add to get region for region references.
# Also add {'name': 'region', 'type': (slice, list, tuple),
# 'doc': 'the region reference indexing object', 'default': None},
# if isinstance(ref_object, RegionBuilder):
# region = ref_object.region

# get the object id if available
object_id = builder.get('object_id', None)
Expand Down Expand Up @@ -1061,12 +1049,7 @@ def write_dataset(self, **kwargs): # noqa: C901
for i, dts in enumerate(options['dtype']):
if self.__is_ref(dts['dtype']):
refs.append(i)
ref_tmp = self._create_ref(data[0][i], export_source=export_source)
if isinstance(ref_tmp, ZarrReference):
dts_str = 'object'
else:
dts_str = 'region'
type_str.append({'name': dts['name'], 'dtype': dts_str})
type_str.append({'name': dts['name'], 'dtype': 'object'})
else:
i = list([dts, ])
t = self.__resolve_dtype_helper__(i)
Expand Down Expand Up @@ -1122,20 +1105,10 @@ def write_dataset(self, **kwargs): # noqa: C901
dset = self.__list_fill__(parent, name, data, options)
# Write a dataset of references
elif self.__is_ref(options['dtype']):
# TODO Region references are not yet support, but here how the code should look
# if isinstance(data, RegionBuilder):
# shape = (1,)
# type_str = 'region'
# refs = self._create_ref(data.builder, data.region)
if isinstance(data, ReferenceBuilder):
shape = (1,)
type_str = 'object'
refs = self._create_ref(data.builder, export_source=export_source)
# TODO: Region References are not yet supported
# elif options['dtype'] == 'region':
# shape = (len(data), )
# type_str = 'region'
# refs = [self._create_ref(item.builder, item.region) for item in data]
else:
shape = (len(data), )
type_str = 'object'
Expand Down Expand Up @@ -1203,7 +1176,6 @@ def write_dataset(self, **kwargs): # noqa: C901
"ref": ZarrReference,
"reference": ZarrReference,
"object": ZarrReference,
"region": ZarrReference,
}

@classmethod
Expand Down Expand Up @@ -1518,20 +1490,15 @@ def __read_dataset(self, zarr_obj, name):
# Check compound dataset where one of the subsets contains references
has_reference = False
for i, dts in enumerate(dtype):
if dts['dtype'] in ['object', 'region']: # check items for object reference
if dts['dtype'] == 'object': # check items for object reference
has_reference = True
break
retrieved_dtypes = [dtype_dict['dtype'] for dtype_dict in dtype]
if has_reference:
# TODO: BuilderZarrTableDataset does not yet support region reference
data = BuilderZarrTableDataset(zarr_obj, self, retrieved_dtypes)
elif self.__is_ref(dtype):
# Array of references
if dtype == 'object':
data = BuilderZarrReferenceDataset(data, self)
# TODO: Resolution of Region reference not yet supported by BuilderZarrRegionDataset
# elif dtype == 'region':
# data = BuilderZarrRegionDataset(data, self)
data = BuilderZarrReferenceDataset(data, self)

kwargs['data'] = data
if name is None:
Expand All @@ -1554,9 +1521,6 @@ def __read_attrs(self, zarr_obj):
ret[k] = self.__read_group(target_zarr_obj, target_name)
else:
ret[k] = self.__read_dataset(target_zarr_obj, target_name)
# TODO Need to implement region references for attributes
elif v['zarr_dtype'] == 'region':
raise NotImplementedError("Read of region references from attributes not implemented in ZarrIO")
else:
raise NotImplementedError("Unsupported zarr_dtype for attribute " + str(v))
else:
Expand Down
54 changes: 3 additions & 51 deletions src/hdmf_zarr/zarr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
from copy import copy
import numpy as np

from zarr import Array as ZarrArray
from zarr import Array

from hdmf.build import DatasetBuilder
from hdmf.data_utils import append_data
from hdmf.array import Array
from hdmf.query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver
from hdmf.utils import docval, popargs, get_docval

Expand All @@ -21,7 +20,7 @@ class ZarrDataset(HDMFDataset):
Extension of HDMFDataset to add Zarr compatibility
"""

@docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray, Array), 'doc': 'the Zarr file lazily evaluate'},
@docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'},
{'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'})
def __init__(self, **kwargs):
self.__io = popargs('io', kwargs)
Expand Down Expand Up @@ -130,7 +129,7 @@ class AbstractZarrTableDataset(DatasetOfReferences):
references in compound datasets to either Builders and Containers.
"""

@docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray, Array), 'doc': 'the Zarr file lazily evaluate'},
@docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'},
{'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'},
{'name': 'types', 'type': (list, tuple),
'doc': 'the list/tuple of reference types'})
Expand All @@ -139,8 +138,6 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)
self.__refgetters = dict()
for i, t in enumerate(types):
# if t is RegionReference: # TODO: Region References not yet supported
# self.__refgetters[i] = self.__get_regref
if t == DatasetBuilder.OBJECT_REF_TYPE:
self.__refgetters[i] = self._get_ref
elif t is str:
Expand All @@ -154,7 +151,6 @@ def __init__(self, **kwargs):
sub = self.dataset.dtype[i]
if np.issubdtype(sub, np.dtype('O')):
tmp.append('object')
# TODO: Region References are not yet supported
if sub.metadata:
if 'vlen' in sub.metadata:
t = sub.metadata['vlen']
Expand Down Expand Up @@ -224,24 +220,6 @@ def dtype(self):
return 'object'


class AbstractZarrRegionDataset(AbstractZarrReferenceDataset):
"""
Extension of DatasetOfReferences to serve as the base class for resolving Zarr
references in datasets to either Builders and Containers.
Note: Region References are not yet supported.
"""

def __getitem__(self, arg):
obj = super().__getitem__(arg)
ref = self.dataset[arg]
return obj[ref]

@property
def dtype(self):
return 'region'


class ContainerZarrTableDataset(ContainerResolverMixin, AbstractZarrTableDataset):
"""
A reference-resolving dataset for resolving references inside tables
Expand Down Expand Up @@ -284,29 +262,3 @@ class BuilderZarrReferenceDataset(BuilderResolverMixin, AbstractZarrReferenceDat
@classmethod
def get_inverse_class(cls):
return ContainerZarrReferenceDataset


class ContainerZarrRegionDataset(ContainerResolverMixin, AbstractZarrRegionDataset):
"""
A reference-resolving dataset for resolving region references that returns
resolved references as Containers
Note: Region References are not yet supported.
"""

@classmethod
def get_inverse_class(cls):
return BuilderZarrRegionDataset


class BuilderZarrRegionDataset(BuilderResolverMixin, AbstractZarrRegionDataset):
"""
A reference-resolving dataset for resolving region references that returns
resolved references as Builders.
Note: Region References are not yet supported.
"""

@classmethod
def get_inverse_class(cls):
return ContainerZarrRegionDataset

0 comments on commit 5a9055d

Please sign in to comment.