Skip to content

Commit

Permalink
added docstsring
Browse files Browse the repository at this point in the history
  • Loading branch information
jmunroe committed Jul 27, 2017
1 parent 6f7369a commit 104ab49
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 14 deletions.
22 changes: 10 additions & 12 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2387,30 +2387,28 @@ def from_dataframe(cls, dataframe):
return obj

def to_dask_dataframe(self, set_index=True):
"""Convert this dataset into a dask.dataframe.DataFrame.
"""
Convert this dataset into a dask.dataframe.DataFrame.
Non-index variables in this dataset form the columns of the
DataFrame.
If set_index=True, the dask DataFrame is indexed by
this dataset's coordinate. Since dask DataFrames to not support
multi-indexes, this only works if there is one coordinate dimension.
"""

import dask.dataframe as dd
import dask.array as da

columns = [k for k in self if k not in self.dims]

index = self.coords.to_index(self.dims)

lazy_data = {k: v._data for k, v in self.variables.items()
if isinstance(v._data, dask_array_type)}

data = [self._variables[k].data.reshape(-1) for k in columns]
df = dd.from_dask_array(da.stack(data, axis=1), columns=columns)

# approach 2 -- doesn't work as is
#data = [dd.from_dask_array(self._variables[k].data.reshape(-1), columns=k) for k in columns]
#df = data[0]
#for d in data[1:]:
# df = dd.merge(df, d)

if set_index:
index = self.coords.to_index(self.dims)

index = dd.from_array(index.values).repartition(divisions=df.divisions)
df = df.set_index(index, sort=False)

Expand Down
3 changes: 1 addition & 2 deletions xarray/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def test_to_dask_dataframe(self):
# but with dask DataFrames instead of pandas DataFrames

x = da.from_array(np.random.randn(10), chunks=4)
y = da.from_array(np.random.randn(10), chunks=4)
y = np.random.randn(10)
t = list('abcdefghij')
ds = Dataset(OrderedDict([('a', ('t', x)),
('b', ('t', y)),
Expand All @@ -417,7 +417,6 @@ def test_to_dask_dataframe(self):
# use the .equals from pandas to check dataframes are equivalent
assert expected.compute().equals(actual.compute()), (expected, actual)


kernel_call_count = 0
def kernel():
"""Dask kernel to test pickling/unpickling.
Expand Down

0 comments on commit 104ab49

Please sign in to comment.