From cc13203464398bb666f217de533211f1637eb8cc Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Mon, 7 Jan 2019 14:58:56 +0100 Subject: [PATCH 1/5] add data=False to to_dict methods --- xarray/core/dataarray.py | 22 ++++++++++------------ xarray/core/dataset.py | 21 +++++++++------------ xarray/core/variable.py | 11 ++++++++++- xarray/tests/test_dataarray.py | 7 +++++++ xarray/tests/test_dataset.py | 16 +++++++++++++--- 5 files changed, 49 insertions(+), 28 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index a63b63b45bf..644e82c45ae 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1759,7 +1759,7 @@ def to_netcdf(self, *args, **kwargs): return dataset.to_netcdf(*args, **kwargs) - def to_dict(self): + def to_dict(self, data=True): """ Convert this xarray.DataArray into a dictionary following xarray naming conventions. @@ -1768,22 +1768,20 @@ def to_dict(self): Useful for coverting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. + Parameters + ---------- + data : bool, optional + Whether to include the actual data in the dictionary. When set to + False, returns just the schema. + See also -------- DataArray.from_dict """ - d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs), - 'dims': self.dims} - + d = self.variable.to_dict(data=data) + d.update({'coords': {}, 'name': self.name}) for k in self.coords: - data = ensure_us_time_resolution(self[k].values).tolist() - d['coords'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) - - d.update({'data': ensure_us_time_resolution(self.values).tolist(), - 'name': self.name}) + d['coords'].update({k: self.coords[k].variable.to_dict(data=data)}) return d @classmethod diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 29178c9b13c..bc39e2f0820 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3218,7 +3218,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): return df - def to_dict(self): + def to_dict(self, data=True): """ Convert this dataset to a dictionary following xarray naming conventions. @@ -3227,25 +3227,22 @@ def to_dict(self): Useful for coverting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. + Parameters + ---------- + data : bool, optional + Whether to include the actual data in the dictionary. When set to + False, returns just the schema. + See also -------- Dataset.from_dict """ d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs), 'dims': dict(self.dims), 'data_vars': {}} - for k in self.coords: - data = ensure_us_time_resolution(self[k].values).tolist() - d['coords'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) + d['coords'].update({k: self[k].variable.to_dict(data=data)}) for k in self.data_vars: - data = ensure_us_time_resolution(self[k].values).tolist() - d['data_vars'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) + d['data_vars'].update({k: self[k].variable.to_dict(data=data)}) return d @classmethod diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 48acc8edff9..e97aa6d67fb 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -18,7 +18,8 @@ from .options import _get_keep_attrs from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, zip) -from .utils import OrderedSet, either_dict_or_kwargs +from .utils import (OrderedSet, either_dict_or_kwargs, decode_numpy_dict_values, + ensure_us_time_resolution) try: import dask.array as da @@ -408,6 +409,14 @@ def to_index(self): """Convert this variable to a pandas.Index""" return self.to_index_variable().to_index() + def to_dict(self, data=True): + """Dictionary representation of variable.""" + item = {'dims': self.dims, + 'attrs': decode_numpy_dict_values(self.attrs)} + if data: + item['data'] = ensure_us_time_resolution(self.values).tolist() + return item + @property def dims(self): """Tuple of dimension names with which this variable is associated. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index aa02e802fc5..7019ddca541 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2909,6 +2909,13 @@ def test_to_and_from_dict(self): ValueError, "cannot convert dict without the key 'data'"): DataArray.from_dict(d) + # check the data=False option + expected_no_data = {**expected} + del expected_no_data['data'] + del expected_no_data['coords']['x']['data'] + actual_no_data = array.to_dict(data=False) + assert expected_no_data == actual_no_data + def test_to_and_from_dict_with_time_dim(self): x = np.random.randn(10, 3) t = pd.date_range('20130101', periods=10) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e7e091efa4c..4fe683c3bd7 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3045,11 +3045,20 @@ def test_to_and_from_dict(self): # check roundtrip assert_identical(ds, Dataset.from_dict(actual)) + # check the data=False option + expected_no_data = {**expected} + print(expected_no_data) + del expected_no_data['coords']['t']['data'] + del expected_no_data['data_vars']['a']['data'] + del expected_no_data['data_vars']['b']['data'] + actual_no_data = ds.to_dict(data=False) + assert expected_no_data == actual_no_data + # verify coords are included roundtrip - expected = ds.set_coords('b') - actual = Dataset.from_dict(expected.to_dict()) + expected_ds = ds.set_coords('b') + actual = Dataset.from_dict(expected_ds.to_dict()) - assert_identical(expected, actual) + assert_identical(expected_ds, actual) # test some incomplete dicts: # this one has no attrs field, the dims are strings, and x, y are @@ -3075,6 +3084,7 @@ def test_to_and_from_dict(self): "without the key 'dims'"): Dataset.from_dict(d) + def test_to_and_from_dict_with_time_dim(self): x = np.random.randn(10, 3) y = np.random.randn(10, 3) From 1e65cc24e349edfcc32deaad246d736406ff6d7d Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Mon, 7 Jan 2019 15:05:28 +0100 Subject: [PATCH 2/5] doc and whats-new --- doc/io.rst | 12 +++++++++++- doc/whats-new.rst | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 151f5eb740f..0dc5181f9b8 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -81,6 +81,16 @@ require external libraries and dicts can easily be pickled, or converted to json, or geojson. All the values are converted to lists, so dicts might be quite large. +To export just the dataset schema, without the data itself, use the +``data=False`` option: + +.. ipython:: python + + ds.to_dict(data=False) + +This can be useful for generating indices of dataset contents to expose to +search indices or other automated data discovery tools. + .. _io.netcdf: netCDF @@ -665,7 +675,7 @@ To read a consolidated store, pass the ``consolidated=True`` option to :py:func:`~xarray.open_zarr`:: ds = xr.open_zarr('foo.zarr', consolidated=True) - + Xarray can't perform consolidation on pre-existing zarr datasets. This should be done directly from zarr, as described in the `zarr docs `_. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b50df2af10e..23929d5a305 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,8 @@ Breaking changes Enhancements ~~~~~~~~~~~~ +- Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`) + By `Ryan Abernathey `_ - :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` are newly added. See :ref:`comput.coarsen` for details. From 7616ae377b10c6b77be7406800fbfb98eefe0c09 Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Mon, 7 Jan 2019 15:11:47 +0100 Subject: [PATCH 3/5] fix pep8 errors --- xarray/core/variable.py | 4 ++-- xarray/tests/test_dataset.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e97aa6d67fb..85560be0853 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -18,8 +18,8 @@ from .options import _get_keep_attrs from .pycompat import ( OrderedDict, basestring, dask_array_type, integer_types, zip) -from .utils import (OrderedSet, either_dict_or_kwargs, decode_numpy_dict_values, - ensure_us_time_resolution) +from .utils import (OrderedSet, either_dict_or_kwargs, + decode_numpy_dict_values, ensure_us_time_resolution) try: import dask.array as da diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 4fe683c3bd7..844b22d9b26 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3084,7 +3084,6 @@ def test_to_and_from_dict(self): "without the key 'dims'"): Dataset.from_dict(d) - def test_to_and_from_dict_with_time_dim(self): x = np.random.randn(10, 3) y = np.random.randn(10, 3) From 4551e241dd5edcfd31f7e0f2b7641e69589b9208 Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Mon, 7 Jan 2019 18:07:04 +0100 Subject: [PATCH 4/5] small tweaks --- xarray/core/dataarray.py | 2 +- xarray/tests/test_dataarray.py | 2 +- xarray/tests/test_dataset.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 644e82c45ae..1cf13b38162 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1781,7 +1781,7 @@ def to_dict(self, data=True): d = self.variable.to_dict(data=data) d.update({'coords': {}, 'name': self.name}) for k in self.coords: - d['coords'].update({k: self.coords[k].variable.to_dict(data=data)}) + d['coords'][k] = self.coords[k].variable.to_dict(data=data) return d @classmethod diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 7019ddca541..42fd982193d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2910,7 +2910,7 @@ def test_to_and_from_dict(self): DataArray.from_dict(d) # check the data=False option - expected_no_data = {**expected} + expected_no_data = expected.copy() del expected_no_data['data'] del expected_no_data['coords']['x']['data'] actual_no_data = array.to_dict(data=False) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 844b22d9b26..6988a547464 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3046,7 +3046,7 @@ def test_to_and_from_dict(self): assert_identical(ds, Dataset.from_dict(actual)) # check the data=False option - expected_no_data = {**expected} + expected_no_data = expected.copy() print(expected_no_data) del expected_no_data['coords']['t']['data'] del expected_no_data['data_vars']['a']['data'] From 4cf7bc8efe9fe6aae4c2487685c883b70aefa9dd Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Tue, 8 Jan 2019 09:42:48 +0100 Subject: [PATCH 5/5] added shape and dtype --- xarray/core/variable.py | 2 ++ xarray/tests/test_dataarray.py | 2 ++ xarray/tests/test_dataset.py | 7 ++++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 85560be0853..8e083bba83c 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -415,6 +415,8 @@ def to_dict(self, data=True): 'attrs': decode_numpy_dict_values(self.attrs)} if data: item['data'] = ensure_us_time_resolution(self.values).tolist() + else: + item.update({'dtype': str(self.dtype), 'shape': self.shape}) return item @property diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 42fd982193d..8995fca2f95 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2913,6 +2913,8 @@ def test_to_and_from_dict(self): expected_no_data = expected.copy() del expected_no_data['data'] del expected_no_data['coords']['x']['data'] + expected_no_data['coords']['x'].update({'dtype': '