added docstsring

pydata · Jul 27, 2017 · 104ab49 · 104ab49
1 parent 6f7369a
commit 104ab49
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 14 deletions.
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -2387,30 +2387,28 @@ def from_dataframe(cls, dataframe):
         return obj
 
     def to_dask_dataframe(self, set_index=True):
-        """Convert this dataset into a dask.dataframe.DataFrame.
+        """
+        Convert this dataset into a dask.dataframe.DataFrame.
+
+        Non-index variables in this dataset form the columns of the
+        DataFrame.
 
+        If set_index=True, the dask DataFrame is indexed by
+        this dataset's coordinate. Since dask DataFrames to not support
+        multi-indexes, this only works if there is one coordinate dimension.
         """
 
         import dask.dataframe as dd
         import dask.array as da
 
         columns = [k for k in self if k not in self.dims]
 
-        index = self.coords.to_index(self.dims)
-
-        lazy_data = {k: v._data for k, v in self.variables.items()
-                     if isinstance(v._data, dask_array_type)}
-
         data = [self._variables[k].data.reshape(-1) for k in columns]
         df = dd.from_dask_array(da.stack(data, axis=1), columns=columns)
 
-        # approach 2 -- doesn't work as is
-        #data = [dd.from_dask_array(self._variables[k].data.reshape(-1), columns=k) for k in columns]
-        #df = data[0]
-        #for d in data[1:]:
-        #    df = dd.merge(df, d)
-
         if set_index:
+            index = self.coords.to_index(self.dims)
+
             index = dd.from_array(index.values).repartition(divisions=df.divisions)
             df = df.set_index(index, sort=False)
 

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
@@ -399,7 +399,7 @@ def test_to_dask_dataframe(self):
         # but with dask DataFrames instead of pandas DataFrames
 
         x = da.from_array(np.random.randn(10), chunks=4)
-        y = da.from_array(np.random.randn(10), chunks=4)
+        y = np.random.randn(10)
         t = list('abcdefghij')
         ds = Dataset(OrderedDict([('a', ('t', x)),
                                   ('b', ('t', y)),
@@ -417,7 +417,6 @@ def test_to_dask_dataframe(self):
         # use the .equals from pandas to check dataframes are equivalent
         assert expected.compute().equals(actual.compute()), (expected, actual)
 
-
 kernel_call_count = 0
 def kernel():
     """Dask kernel to test pickling/unpickling.