diff --git a/docs/api.rst b/docs/api.rst index 1e4015460b..8162ada965 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -8,6 +8,7 @@ API reference api/core api/hierarchy api/storage + api/n5 api/convenience api/codecs api/attrs diff --git a/docs/api/n5.rst b/docs/api/n5.rst new file mode 100644 index 0000000000..b6a8d8c61e --- /dev/null +++ b/docs/api/n5.rst @@ -0,0 +1,5 @@ +N5 (``zarr.n5``) +================ +.. automodule:: zarr.n5 + +.. autoclass:: N5Store diff --git a/docs/release.rst b/docs/release.rst index 49861268d4..64871f9a2f 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -34,6 +34,11 @@ Enhancements MongoDB database to be used as the backing store for an array or group. By :user:`Joe Hamman `, :issue:`299`, :issue:`372`. +* **New storage class for N5 containers**. The :class:`zarr.n5.N5Store` has been + added, which uses :class:`zarr.storage.NestedDirectoryStore` to support + reading and writing from and to N5 containers. + By :user:`Jan Funke ` and :user:`John Kirkham ` + Bug fixes ~~~~~~~~~ diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 3e8e9bac66..85e3675972 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -746,6 +746,20 @@ with `MongoDB `_ (an oject oriented NoSQL database). T respectively require the `redis `_ and `pymongo `_ packages to be installed. +For compatibility with the `N5>> store = zarr.N5Store('data/example.n5') + >>> root = zarr.group(store=store) + >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') + >>> z[:] = 42 + +For convenience, the N5 backend will automatically be chosen when the filename +ends with `.n5`:: + + >>> root = zarr.open('data/example.n5', mode='w') + Distributed/cloud storage ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/zarr/__init__.py b/zarr/__init__.py index e208b8ae82..6a1fdc52cc 100644 --- a/zarr/__init__.py +++ b/zarr/__init__.py @@ -15,5 +15,6 @@ from zarr.convenience import (open, save, save_array, save_group, load, copy_store, copy, copy_all, tree, consolidate_metadata, open_consolidated) +from zarr.n5 import N5Store from zarr.errors import CopyError, MetadataError, PermissionError from zarr.version import version as __version__ diff --git a/zarr/creation.py b/zarr/creation.py index 0184a4a5da..79cc62fb61 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -9,6 +9,7 @@ from zarr.core import Array from zarr.storage import (DirectoryStore, init_array, contains_array, contains_group, default_compressor, normalize_storage_path, ZipStore) +from zarr.n5 import N5Store from numcodecs.registry import codec_registry from zarr.errors import err_contains_array, err_contains_group, err_array_not_found @@ -132,6 +133,8 @@ def normalize_store_arg(store, clobber=False, default=dict): if store.endswith('.zip'): mode = 'w' if clobber else 'a' return ZipStore(store, mode=mode) + elif store.endswith('.n5'): + return N5Store(store) else: return DirectoryStore(store) else: diff --git a/zarr/n5.py b/zarr/n5.py new file mode 100644 index 0000000000..8139464dc6 --- /dev/null +++ b/zarr/n5.py @@ -0,0 +1,653 @@ +# -*- coding: utf-8 -*- +"""This module contains a storage class and codec to support the N5 format. +""" +from __future__ import absolute_import, division +from .meta import ZARR_FORMAT, ensure_str, json_dumps +from .storage import ( + NestedDirectoryStore, + group_meta_key as zarr_group_meta_key, + array_meta_key as zarr_array_meta_key, + attrs_key as zarr_attrs_key, + _prog_ckey, _prog_number) +from numcodecs.abc import Codec +from numcodecs.compat import ndarray_copy +from numcodecs.registry import register_codec, get_codec +import json +import numpy as np +import struct +import sys +import os +import warnings + + +zarr_to_n5_keys = [ + ('chunks', 'blockSize'), + ('dtype', 'dataType'), + ('compressor', 'compression'), + ('shape', 'dimensions') +] +n5_attrs_key = 'attributes.json' +n5_keywords = ['n5', 'dataType', 'dimensions', 'blockSize', 'compression'] + + +class N5Store(NestedDirectoryStore): + """Storage class using directories and files on a standard file system, + following the N5 format (https://github.com/saalfeldlab/n5). + + Parameters + ---------- + path : string + Location of directory to use as the root of the storage hierarchy. + + Examples + -------- + Store a single array:: + + >>> import zarr + >>> store = zarr.N5Store('data/array.n5') + >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> z[...] = 42 + + Store a group:: + + >>> store = zarr.N5Store('data/group.n5') + >>> root = zarr.group(store=store, overwrite=True) + >>> foo = root.create_group('foo') + >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) + >>> bar[...] = 42 + + Notes + ----- + + This is an experimental feature. + + Safe to write in multiple threads or processes. + + """ + + def __getitem__(self, key): + + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, n5_attrs_key) + value = group_metadata_to_zarr(self._load_n5_attrs(key)) + + return json_dumps(value).encode('ascii') + + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, n5_attrs_key) + value = array_metadata_to_zarr(self._load_n5_attrs(key)) + + return json_dumps(value).encode('ascii') + + elif key.endswith(zarr_attrs_key): + + key = key.replace(zarr_attrs_key, n5_attrs_key) + value = attrs_to_zarr(self._load_n5_attrs(key)) + + if len(value) == 0: + raise KeyError(key) + else: + return json_dumps(value).encode('ascii') + + elif is_chunk_key(key): + + key = invert_chunk_coords(key) + + return super(N5Store, self).__getitem__(key) + + def __setitem__(self, key, value): + + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, n5_attrs_key) + + value = ensure_str(value) + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**group_metadata_to_n5(json.loads(value))) + + value = json_dumps(n5_attrs).encode('ascii') + + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, n5_attrs_key) + + value = ensure_str(value) + n5_attrs = self._load_n5_attrs(key) + n5_attrs.update(**array_metadata_to_n5(json.loads(value))) + + value = json_dumps(n5_attrs).encode('ascii') + + elif key.endswith(zarr_attrs_key): + + key = key.replace(zarr_attrs_key, n5_attrs_key) + + value = ensure_str(value) + n5_attrs = self._load_n5_attrs(key) + zarr_attrs = json.loads(value) + + for k in n5_keywords: + if k in zarr_attrs.keys(): + raise ValueError("Can not set attribute %s, this is a reserved N5 keyword" % k) + + # replace previous user attributes + for k in list(n5_attrs.keys()): + if k not in n5_keywords: + del n5_attrs[k] + + # add new user attributes + n5_attrs.update(**zarr_attrs) + + value = json_dumps(n5_attrs).encode('ascii') + + elif is_chunk_key(key): + + key = invert_chunk_coords(key) + + super(N5Store, self).__setitem__(key, value) + + def __delitem__(self, key): + + if key.endswith(zarr_group_meta_key): # pragma: no cover + key = key.replace(zarr_group_meta_key, n5_attrs_key) + elif key.endswith(zarr_array_meta_key): # pragma: no cover + key = key.replace(zarr_array_meta_key, n5_attrs_key) + elif key.endswith(zarr_attrs_key): # pragma: no cover + key = key.replace(zarr_attrs_key, n5_attrs_key) + elif is_chunk_key(key): + key = invert_chunk_coords(key) + + super(N5Store, self).__delitem__(key) + + def __contains__(self, key): + + if key.endswith(zarr_group_meta_key): + + key = key.replace(zarr_group_meta_key, n5_attrs_key) + if key not in self: + return False + # group if not a dataset (attributes do not contain 'dimensions') + return 'dimensions' not in self._load_n5_attrs(key) + + elif key.endswith(zarr_array_meta_key): + + key = key.replace(zarr_array_meta_key, n5_attrs_key) + # array if attributes contain 'dimensions' + return 'dimensions' in self._load_n5_attrs(key) + + elif key.endswith(zarr_attrs_key): # pragma: no cover + + key = key.replace(zarr_array_meta_key, n5_attrs_key) + return self._contains_attrs(key) + + elif is_chunk_key(key): + + key = invert_chunk_coords(key) + + return super(N5Store, self).__contains__(key) + + def __eq__(self, other): + return ( + isinstance(other, N5Store) and + self.path == other.path + ) + + def listdir(self, path=None): + + if path is not None: + path = invert_chunk_coords(path) + + # We can't use NestedDirectoryStore's listdir, as it requires + # array_meta_key to be present in array directories, which this store + # doesn't provide. + children = super(NestedDirectoryStore, self).listdir(path=path) + + if self._is_array(path): + + # replace n5 attribute file with respective zarr attribute files + children.remove(n5_attrs_key) + children.append(zarr_array_meta_key) + if self._contains_attrs(path): + children.append(zarr_attrs_key) + + # special handling of directories containing an array to map + # inverted nested chunk keys back to standard chunk keys + new_children = [] + root_path = self.dir_path(path) + for entry in children: + entry_path = os.path.join(root_path, entry) + if _prog_number.match(entry) and os.path.isdir(entry_path): + for dir_path, _, file_names in os.walk(entry_path): + for file_name in file_names: + file_path = os.path.join(dir_path, file_name) + rel_path = file_path.split(root_path + os.path.sep)[1] + new_child = rel_path.replace(os.path.sep, '.') + new_children.append(invert_chunk_coords(new_child)) + else: + new_children.append(entry) + + return sorted(new_children) + + elif self._is_group(path): + + # replace n5 attribute file with respective zarr attribute files + children.remove(n5_attrs_key) + children.append(zarr_group_meta_key) + if self._contains_attrs(path): # pragma: no cover + children.append(zarr_attrs_key) + + return sorted(children) + + else: + + return children + + def _load_n5_attrs(self, path): + try: + s = super(N5Store, self).__getitem__(path) + s = ensure_str(s) + return json.loads(s) + except KeyError: + return {} + + def _is_group(self, path): + + if path is None: + attrs_key = n5_attrs_key + else: + attrs_key = os.path.join(path, n5_attrs_key) + + n5_attrs = self._load_n5_attrs(attrs_key) + return len(n5_attrs) > 0 and 'dimensions' not in n5_attrs + + def _is_array(self, path): + + if path is None: + attrs_key = n5_attrs_key + else: + attrs_key = os.path.join(path, n5_attrs_key) + + return 'dimensions' in self._load_n5_attrs(attrs_key) + + def _contains_attrs(self, path): + + if path is None: + attrs_key = n5_attrs_key + else: + if not path.endswith(n5_attrs_key): + attrs_key = os.path.join(path, n5_attrs_key) + else: # pragma: no cover + attrs_key = path + + attrs = attrs_to_zarr(self._load_n5_attrs(attrs_key)) + return len(attrs) > 0 + + +def is_chunk_key(key): + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + return _prog_ckey.match(last_segment) + return False # pragma: no cover + + +def invert_chunk_coords(key): + segments = list(key.split('/')) + if segments: + last_segment = segments[-1] + if _prog_ckey.match(last_segment): + coords = list(last_segment.split('.')) + last_segment = '.'.join(coords[::-1]) + segments = segments[:-1] + [last_segment] + key = '/'.join(segments) + return key + + +def group_metadata_to_n5(group_metadata): + '''Convert group metadata from zarr to N5 format.''' + del group_metadata['zarr_format'] + group_metadata['n5'] = '2.0.0' + return group_metadata + + +def group_metadata_to_zarr(group_metadata): + '''Convert group metadata from N5 to zarr format.''' + del group_metadata['n5'] + group_metadata['zarr_format'] = ZARR_FORMAT + return group_metadata + + +def array_metadata_to_n5(array_metadata): + '''Convert array metadata from zarr to N5 format.''' + + for f, t in zarr_to_n5_keys: + array_metadata[t] = array_metadata[f] + del array_metadata[f] + del array_metadata['zarr_format'] + + try: + dtype = np.dtype(array_metadata['dataType']) + except TypeError: # pragma: no cover + raise TypeError( + "data type %s not supported by N5" % array_metadata['dataType']) + + array_metadata['dataType'] = dtype.name + array_metadata['dimensions'] = array_metadata['dimensions'][::-1] + array_metadata['blockSize'] = array_metadata['blockSize'][::-1] + + if 'fill_value' in array_metadata: + if array_metadata['fill_value'] != 0 and array_metadata['fill_value'] is not None: + raise ValueError("N5 only supports fill_value == 0 (for now)") + del array_metadata['fill_value'] + + if 'order' in array_metadata: + if array_metadata['order'] != 'C': + raise ValueError("zarr N5 storage only stores arrays in C order (for now)") + del array_metadata['order'] + + if 'filters' in array_metadata: + if array_metadata['filters'] != [] and array_metadata['filters'] is not None: + raise ValueError("N5 storage does not support zarr filters") + del array_metadata['filters'] + + assert 'compression' in array_metadata + compressor_config = array_metadata['compression'] + compressor_config = compressor_config_to_n5(compressor_config) + array_metadata['compression'] = compressor_config + + return array_metadata + + +def array_metadata_to_zarr(array_metadata): + '''Convert array metadata from N5 to zarr format.''' + for t, f in zarr_to_n5_keys: + array_metadata[t] = array_metadata[f] + del array_metadata[f] + array_metadata['zarr_format'] = ZARR_FORMAT + + array_metadata['shape'] = array_metadata['shape'][::-1] + array_metadata['chunks'] = array_metadata['chunks'][::-1] + array_metadata['fill_value'] = 0 # also if None was requested + array_metadata['order'] = 'C' + array_metadata['filters'] = [] + + compressor_config = array_metadata['compressor'] + compressor_config = compressor_config_to_zarr(compressor_config) + array_metadata['compressor'] = { + 'id': N5ChunkWrapper.codec_id, + 'compressor_config': compressor_config, + 'dtype': array_metadata['dtype'], + 'chunk_shape': array_metadata['chunks'] + } + + return array_metadata + + +def attrs_to_zarr(attrs): + '''Get all zarr attributes from an N5 attributes dictionary (i.e., + all non-keyword attributes).''' + + # remove all N5 keywords + for n5_key in n5_keywords: + if n5_key in attrs: + del attrs[n5_key] + + return attrs + + +def compressor_config_to_n5(compressor_config): + + if compressor_config is None: + return {'type': 'raw'} + + # peel wrapper, if present + if compressor_config['id'] == N5ChunkWrapper.codec_id: + compressor_config = compressor_config['compressor_config'] + + codec_id = compressor_config['id'] + n5_config = {'type': codec_id} + + if codec_id == 'bz2': + + n5_config['type'] = 'bzip2' + n5_config['blockSize'] = compressor_config['level'] + + elif codec_id == 'blosc': + + warnings.warn( + "Not all N5 implementations support blosc compression (yet). You " + "might not be able to open the dataset with another N5 library.", + RuntimeWarning + ) + + n5_config['codec'] = compressor_config['cname'] + n5_config['level'] = compressor_config['clevel'] + n5_config['shuffle'] = compressor_config['shuffle'] + assert compressor_config['blocksize'] == 0, \ + "blosc block size needs to be 0 for N5 containers." + + elif codec_id == 'lzma': + + # Switch to XZ for N5 if we are using the default XZ format. + # Note: 4 is the default, which is lzma.CHECK_CRC64. + if compressor_config['format'] == 1 and compressor_config['check'] in [-1, 4]: + n5_config['type'] = 'xz' + else: + warnings.warn( + "Not all N5 implementations support lzma compression (yet). You " + "might not be able to open the dataset with another N5 library.", + RuntimeWarning + ) + n5_config['format'] = compressor_config['format'] + n5_config['check'] = compressor_config['check'] + n5_config['filters'] = compressor_config['filters'] + + # The default is lzma.PRESET_DEFAULT, which is 6. + if compressor_config['preset']: + n5_config['preset'] = compressor_config['preset'] + else: + n5_config['preset'] = 6 + + elif codec_id == 'zlib': + + n5_config['type'] = 'gzip' + n5_config['level'] = compressor_config['level'] + n5_config['useZlib'] = True + + elif codec_id == 'gzip': # pragma: no cover + + n5_config['type'] = 'gzip' + n5_config['level'] = compressor_config['level'] + n5_config['useZlib'] = False + + else: # pragma: no cover + + raise RuntimeError("Unknown compressor with id %s" % codec_id) + + return n5_config + + +def compressor_config_to_zarr(compressor_config): + + codec_id = compressor_config['type'] + zarr_config = {'id': codec_id} + + if codec_id == 'bzip2': + + zarr_config['id'] = 'bz2' + zarr_config['level'] = compressor_config['blockSize'] + + elif codec_id == 'blosc': + + zarr_config['cname'] = compressor_config['codec'] + zarr_config['clevel'] = compressor_config['level'] + zarr_config['shuffle'] = compressor_config['shuffle'] + zarr_config['blocksize'] = 0 + + elif codec_id == 'lzma': + + zarr_config['format'] = compressor_config['format'] + zarr_config['check'] = compressor_config['check'] + zarr_config['preset'] = compressor_config['preset'] + zarr_config['filters'] = compressor_config['filters'] + + elif codec_id == 'xz': + + zarr_config['id'] = 'lzma' + zarr_config['format'] = 1 # lzma.FORMAT_XZ + zarr_config['check'] = -1 + zarr_config['preset'] = compressor_config['preset'] + zarr_config['filters'] = None + + elif codec_id == 'gzip': + + if 'useZlib' in compressor_config and compressor_config['useZlib']: + zarr_config['id'] = 'zlib' + zarr_config['level'] = compressor_config['level'] + else: # pragma: no cover + zarr_config['id'] = 'gzip' + zarr_config['level'] = compressor_config['level'] + + elif codec_id == 'raw': + + return None + + else: # pragma: no cover + + raise RuntimeError("Unknown compressor with id %s" % codec_id) + + return zarr_config + + +class N5ChunkWrapper(Codec): + + codec_id = 'n5_wrapper' + + def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): + + self.dtype = np.dtype(dtype) + self.chunk_shape = tuple(chunk_shape) + # is the dtype a little endian format? + self._little_endian = ( + self.dtype.byteorder == '<' or + (self.dtype.byteorder == '=' and sys.byteorder == 'little') + ) + + if compressor: # pragma: no cover + if compressor_config is not None: + raise ValueError("Only one of compressor_config or compressor should be given.") + compressor_config = compressor.get_config() + + if ( + compressor_config is None and compressor is None or + compressor_config['id'] == 'raw'): + self.compressor_config = None + self._compressor = None + else: + self._compressor = get_codec(compressor_config) + self.compressor_config = self._compressor.get_config() + + def get_config(self): + config = { + 'id': self.codec_id, + 'compressor_config': self.compressor_config + } + return config + + def encode(self, chunk): + + assert chunk.flags.c_contiguous + + header = self._create_header(chunk) + chunk = self._to_big_endian(chunk) + + if self._compressor: + return header + self._compressor.encode(chunk) + else: + return header + chunk.tobytes(order='A') + + def decode(self, chunk, out=None): + + len_header, chunk_shape = self._read_header(chunk) + chunk = chunk[len_header:] + + if out is not None: + + # out should only be used if we read a complete chunk + assert chunk_shape == self.chunk_shape, ( + "Expected chunk of shape %s, found %s" % ( + self.chunk_shape, + chunk_shape)) + + if self._compressor: + self._compressor.decode(chunk, out) + else: + ndarray_copy(chunk, out) + + # we can byteswap in-place + if self._little_endian: + out.byteswap(True) + + return out + + else: + + if self._compressor: + chunk = self._compressor.decode(chunk) + + # more expensive byteswap + chunk = self._from_big_endian(chunk) + + # read partial chunk + if chunk_shape != self.chunk_shape: # pragma: no cover + chunk = np.frombuffer(chunk, dtype=self.dtype) + chunk = chunk.reshape(chunk_shape) + complete_chunk = np.zeros(self.chunk_shape, dtype=self.dtype) + target_slices = tuple(slice(0, s) for s in chunk_shape) + complete_chunk[target_slices] = chunk + chunk = complete_chunk + + return chunk + + def _create_header(self, chunk): + + mode = struct.pack('>H', 0) + num_dims = struct.pack('>H', len(chunk.shape)) + shape = b''.join( + struct.pack('>I', d) + for d in chunk.shape[::-1] + ) + + return mode + num_dims + shape + + def _read_header(self, chunk): + + num_dims = struct.unpack('>H', chunk[2:4])[0] + shape = tuple( + struct.unpack('>I', chunk[i:i+4])[0] + for i in range(4, num_dims*4 + 4, 4) + )[::-1] + + len_header = 4 + num_dims*4 + + return len_header, shape + + def _to_big_endian(self, data): + # assumes data is ndarray + + if self._little_endian: + return data.byteswap() + return data + + def _from_big_endian(self, data): + # assumes data is byte array in big endian + + if not self._little_endian: + return data + + a = np.frombuffer(data, self.dtype.newbyteorder('>')) + return a.astype(self.dtype) + + +register_codec(N5ChunkWrapper, N5ChunkWrapper.codec_id) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 3a7a092033..1c8d5222f3 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -23,6 +23,7 @@ from zarr.compat import PY2, text_type, binary_type, zip_longest from zarr.meta import ensure_str from zarr.util import buffer_size +from zarr.n5 import n5_keywords, N5Store from numcodecs import (Delta, FixedScaleOffset, LZ4, GZip, Zlib, Blosc, BZ2, MsgPack, Pickle, Categorize, JSON, VLenUTF8, VLenBytes, VLenArray) from numcodecs.compat import ensure_bytes, ensure_ndarray @@ -1413,6 +1414,257 @@ def create_array(read_only=False, **kwargs): cache_attrs=cache_attrs) +class TestArrayWithN5Store(TestArrayWithDirectoryStore): + + @staticmethod + def create_array(read_only=False, **kwargs): + path = mkdtemp() + atexit.register(shutil.rmtree, path) + store = N5Store(path) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + kwargs.setdefault('compressor', Zlib(1)) + init_array(store, **kwargs) + return Array(store, read_only=read_only, cache_metadata=cache_metadata, + cache_attrs=cache_attrs) + + def test_array_0d(self): + # test behaviour for array with 0 dimensions + + # setup + a = np.zeros(()) + z = self.create_array(shape=(), dtype=a.dtype, fill_value=0) + + # check properties + assert a.ndim == z.ndim + assert a.shape == z.shape + assert a.size == z.size + assert a.dtype == z.dtype + assert a.nbytes == z.nbytes + with pytest.raises(TypeError): + len(z) + assert () == z.chunks + assert 1 == z.nchunks + assert (1,) == z.cdata_shape + # compressor always None - no point in compressing a single value + assert z.compressor.compressor_config is None + + # check __getitem__ + b = z[...] + assert isinstance(b, np.ndarray) + assert a.shape == b.shape + assert a.dtype == b.dtype + assert_array_equal(a, np.array(z)) + assert_array_equal(a, z[...]) + assert a[()] == z[()] + with pytest.raises(IndexError): + z[0] + with pytest.raises(IndexError): + z[:] + + # check __setitem__ + z[...] = 42 + assert 42 == z[()] + z[()] = 43 + assert 43 == z[()] + with pytest.raises(IndexError): + z[0] = 42 + with pytest.raises(IndexError): + z[:] = 42 + with pytest.raises(ValueError): + z[...] = np.array([1, 2, 3]) + + def test_array_1d_fill_value(self): + nvalues = 1050 + dtype = np.int32 + for fill_value in 0, None: + a = np.arange(nvalues, dtype=dtype) + f = np.empty_like(a) + f.fill(fill_value or 0) + z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, + fill_value=fill_value) + z[190:310] = a[190:310] + + assert_array_equal(f[:190], z[:190]) + assert_array_equal(a[190:310], z[190:310]) + assert_array_equal(f[310:], z[310:]) + + with pytest.raises(ValueError): + z = self.create_array(shape=(nvalues,), chunks=100, dtype=dtype, + fill_value=1) + + def test_array_order(self): + + # N5 only supports 'C' at the moment + with pytest.raises(ValueError): + self.create_array(shape=(10, 11), chunks=(10, 11), dtype='i8', + order='F') + + # 1D + a = np.arange(1050) + z = self.create_array(shape=a.shape, chunks=100, dtype=a.dtype, + order='C') + assert z.order == 'C' + assert z[:].flags.c_contiguous + z[:] = a + assert_array_equal(a, z[:]) + + # 2D + a = np.arange(10000).reshape((100, 100)) + z = self.create_array(shape=a.shape, chunks=(10, 10), + dtype=a.dtype, order='C') + + assert z.order == 'C' + assert z[:].flags.c_contiguous + z[:] = a + actual = z[:] + assert_array_equal(a, actual) + + def test_structured_array(self): + d = np.array([(b'aaa', 1, 4.2), + (b'bbb', 2, 8.4), + (b'ccc', 3, 12.6)], + dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) + fill_values = None, b'', (b'zzz', 42, 16.8) + with pytest.raises(TypeError): + self.check_structured_array(d, fill_values) + + def test_structured_array_subshapes(self): + d = np.array([(0, ((0, 1, 2), (1, 2, 3)), b'aaa'), + (1, ((1, 2, 3), (2, 3, 4)), b'bbb'), + (2, ((2, 3, 4), (3, 4, 5)), b'ccc')], + dtype=[('foo', 'i8'), ('bar', '(2, 3)f4'), ('baz', 'S3')]) + fill_values = None, b'', (0, ((0, 0, 0), (1, 1, 1)), b'zzz') + with pytest.raises(TypeError): + self.check_structured_array(d, fill_values) + + def test_structured_array_nested(self): + d = np.array([(0, (0, ((0, 1), (1, 2), (2, 3)), 0), b'aaa'), + (1, (1, ((1, 2), (2, 3), (3, 4)), 1), b'bbb'), + (2, (2, ((2, 3), (3, 4), (4, 5)), 2), b'ccc')], + dtype=[('foo', 'i8'), ('bar', [('foo', 'i4'), ('bar', '(3, 2)f4'), + ('baz', 'u1')]), ('baz', 'S3')]) + fill_values = None, b'', (0, (0, ((0, 0), (1, 1), (2, 2)), 0), b'zzz') + with pytest.raises(TypeError): + self.check_structured_array(d, fill_values) + + def test_object_arrays(self): + + # an object_codec is required for object arrays + with pytest.raises(ValueError): + self.create_array(shape=10, chunks=3, dtype=object) + + # an object_codec is required for object arrays, but allow to be provided via + # filters to maintain API backwards compatibility + with pytest.raises(ValueError): + with pytest.warns(FutureWarning): + self.create_array(shape=10, chunks=3, dtype=object, filters=[MsgPack()]) + + # create an object array using an object codec + with pytest.raises(ValueError): + self.create_array(shape=10, chunks=3, dtype=object, object_codec=MsgPack()) + + def test_object_arrays_vlen_text(self): + + data = np.array(greetings * 1000, dtype=object) + + with pytest.raises(ValueError): + self.create_array(shape=data.shape, dtype=object, object_codec=VLenUTF8()) + + # convenience API + with pytest.raises(ValueError): + self.create_array(shape=data.shape, dtype=text_type) + + def test_object_arrays_vlen_bytes(self): + + greetings_bytes = [g.encode('utf8') for g in greetings] + data = np.array(greetings_bytes * 1000, dtype=object) + + with pytest.raises(ValueError): + self.create_array(shape=data.shape, dtype=object, object_codec=VLenBytes()) + + # convenience API + with pytest.raises(ValueError): + self.create_array(shape=data.shape, dtype=binary_type) + + def test_object_arrays_vlen_array(self): + + data = np.array([np.array([1, 3, 7]), + np.array([5]), + np.array([2, 8, 12])] * 1000, dtype=object) + + codecs = VLenArray(int), VLenArray('