fixes

pydata · Sep 29, 2016 · 510997a · 510997a
1 parent 7901c0c
commit 510997a
Show file tree

Hide file tree

Showing 10 changed files with 60 additions and 80 deletions.
diff --git a/xarray/backends/common.py b/xarray/backends/common.py
@@ -30,25 +30,6 @@ def _decode_variable_name(name):
     return name
 
 
-def is_trivial_index(var):
-    """
-    Determines if in index is 'trivial' meaning that it is
-    equivalent to np.arange().  This is determined by
-    checking if there are any attributes or encodings,
-    if ndims is one, dtype is int and finally by comparing
-    the actual values to np.arange()
-    """
-    # if either attributes or encodings are defined
-    # the index is not trivial.
-    if len(var.attrs) or len(var.encoding):
-        return False
-    # if the index is not a 1d integer array
-    if var.ndim > 1 or not var.dtype.kind == 'i':
-        return False
-    arange = np.arange(var.size, dtype=var.dtype)
-    return np.all(var.values == arange)
-
-
 def robust_getitem(array, key, catch=Exception, max_retries=6,
                    initial_delay=500):
     """
@@ -200,12 +181,6 @@ def store_dataset(self, dataset):
 
     def store(self, variables, attributes, check_encoding_set=frozenset()):
         self.set_attributes(attributes)
-        neccesary_dims = [v.dims for v in variables.values()]
-        neccesary_dims = set(itertools.chain(*neccesary_dims))
-        # set all non-indexes and any index which is not trivial.
-        variables = OrderedDict((k, v) for k, v in iteritems(variables)
-                                if not (k in neccesary_dims and
-                                        is_trivial_index(v)))
         self.set_variables(variables, check_encoding_set)
 
     def set_attributes(self, attributes):

diff --git a/xarray/conventions.py b/xarray/conventions.py
@@ -910,7 +910,7 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,
         identify coordinates.
     drop_variables: string or iterable, optional
         A variable or list of variables to exclude from being parsed from the
-        dataset.This may be useful to drop variables with problems or
+        dataset. This may be useful to drop variables with problems or
         inconsistent values.
 
     Returns

diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -8,7 +8,7 @@
 from . import ops, utils
 from .common import _maybe_promote
 from .indexing import get_indexer
-from .pycompat import iteritems, OrderedDict
+from .pycompat import iteritems, OrderedDict, suppress
 from .utils import is_full_slice, is_dict_like
 from .variable import Variable, IndexVariable
 
@@ -112,16 +112,16 @@ def align(*objects, **kwargs):
     #   pandas). This is useful, e.g., for overwriting such duplicate indexes.
     joiner = _get_joiner(join)
     joined_indexes = {}
-    for name, matching_indexes in iteritems(all_indexes):
-        if name in indexes:
-            index = utils.safe_cast_to_index(indexes[name])
+    for dim, matching_indexes in iteritems(all_indexes):
+        if dim in indexes:
+            index = utils.safe_cast_to_index(indexes[dim])
             if any(not index.equals(other) for other in matching_indexes):
-                joined_indexes[name] = index
+                joined_indexes[dim] = index
         else:
             if any(not matching_indexes[0].equals(other)
                    for other in matching_indexes[1:]):
                 index = joiner(matching_indexes)
-                joined_indexes[name] = index
+                joined_indexes[dim] = index
             else:
                 index = matching_indexes[0]
 
@@ -384,18 +384,17 @@ def broadcast(*args, **kwargs):
     for arg in args:
         for dim in arg.dims:
             if dim not in common_coords and dim not in exclude:
-                common_coords[dim] = arg.coords[dim].variable
-                dims_map[dim] = common_coords[dim].size
+                dims_map[dim] = get_size(arg, dim)
+                if dim in arg.coords:
+                    common_coords[dim] = arg.coords[dim].variable
 
     def _expand_dims(var):
         # Add excluded dims to a copy of dims_map
         var_dims_map = dims_map.copy()
         for dim in exclude:
-            try:
+            with suppress(ValueError):
+                # ignore dim not in var.dims
                 var_dims_map[dim] = var.shape[var.dims.index(dim)]
-            except ValueError:
-                # dim not in var.dims
-                pass
 
         return var.expand_dims(var_dims_map)
 
@@ -414,6 +413,8 @@ def _broadcast_dataset(ds):
         coords.update(common_coords)
         return Dataset(data_vars, coords, ds.attrs)
 
+    print(common_coords, dims_map)
+
     result = []
     for arg in args:
         if isinstance(arg, DataArray):

diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
@@ -219,25 +219,21 @@ def __delitem__(self, key):
         del self._data._coords[key]
 
 
-class LevelCoordinates(AbstractCoordinates):
-    """Dictionary like container for MultiIndex level coordinates.
+class LevelCoordinatesSource(object):
+    """Iterator for MultiIndex level coordinates.
 
-    Used for attribute style lookup. Not returned directly by any
-    public methods.
+    Used for attribute style lookup with AttrAccessMixin. Not returned directly
+    by any public methods.
     """
-    def __init__(self, dataarray):
-        self._data = dataarray
+    def __init__(self, data_object):
+        self._data = data_object
 
-    @property
-    def _names(self):
-        return set(self._data._level_coords)
+    def __getitem__(self, key):
+        # not necessary -- everything here can already be found on coords.
+        raise KeyError
 
-    @property
-    def variables(self):
-        level_coords = OrderedDict(
-            (k, self._data[v].variable.get_level_variable(k))
-            for k, v in self._data._level_coords.items())
-        return Frozen(level_coords)
+    def __iter__(self):
+        return iter(self._data._level_coords)
 
 
 class Indexes(Mapping, formatting.ReprMixin):

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -14,7 +14,7 @@
 from . import utils
 from .alignment import align
 from .common import AbstractArray, BaseDataObject, squeeze
-from .coordinates import (DataArrayCoordinates, LevelCoordinates,
+from .coordinates import (DataArrayCoordinates, LevelCoordinatesSource,
                           Indexes)
 from .dataset import Dataset
 from .pycompat import iteritems, basestring, OrderedDict, zip
@@ -471,7 +471,7 @@ def __delitem__(self, key):
     @property
     def _attr_sources(self):
         """List of places to look-up items for attribute-style access"""
-        return [self.coords, LevelCoordinates(self), self.attrs]
+        return [self.coords, LevelCoordinatesSource(self), self.attrs]
 
     def __contains__(self, key):
         return key in self._coords
@@ -1639,7 +1639,8 @@ def dot(self, other):
         axes = (self.get_axis_num(dims), other.get_axis_num(dims))
         new_data = ops.tensordot(self.data, other.data, axes=axes)
 
-        new_coords = self.coords.merge(other.coords).drop(dims)
+        new_coords = self.coords.merge(other.coords)
+        new_coords = new_coords.drop([d for d in dims if d in new_coords])
         new_dims = ([d for d in self.dims if d not in dims] +
                     [d for d in other.dims if d not in dims])
 

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -15,7 +15,7 @@
 from . import formatting
 from .. import conventions
 from .alignment import align
-from .coordinates import DatasetCoordinates, LevelCoordinates, Indexes
+from .coordinates import DatasetCoordinates, LevelCoordinatesSource, Indexes
 from .common import ImplementsDatasetReduce, BaseDataObject
 from .merge import (dataset_update_method, dataset_merge_method,
                     merge_data_and_coords)
@@ -499,7 +499,7 @@ def __deepcopy__(self, memo=None):
     @property
     def _attr_sources(self):
         """List of places to look-up items for attribute-style access"""
-        return [self, LevelCoordinates(self), self.attrs]
+        return [self, LevelCoordinatesSource(self), self.attrs]
 
     def __contains__(self, key):
         """The 'in' operator will return true or false depending on whether

diff --git a/xarray/test/test_backends.py b/xarray/test/test_backends.py
@@ -1088,7 +1088,8 @@ class MiscObject:
 class TestValidateAttrs(TestCase):
     def test_validating_attrs(self):
         def new_dataset():
-            return Dataset({'data': ('y', np.arange(10.0))})
+            return Dataset({'data': ('y', np.arange(10.0))},
+                           {'y': np.arange(10)})
 
         def new_dataset_and_dataset_attrs():
             ds = new_dataset()

diff --git a/xarray/test/test_combine.py b/xarray/test/test_combine.py
@@ -31,19 +31,21 @@ def rectify_dim_order(dataset):
         for dim in ['dim1', 'dim2']:
             datasets = [g for _, g in data.groupby(dim, squeeze=False)]
             self.assertDatasetIdentical(data, concat(datasets, dim))
-            self.assertDatasetIdentical(
-                data, concat(datasets, data[dim]))
-            self.assertDatasetIdentical(
-                data, concat(datasets, data[dim], coords='minimal'))
 
-            datasets = [g for _, g in data.groupby(dim, squeeze=True)]
-            concat_over = [k for k, v in iteritems(data.coords)
-                           if dim in v.dims and k != dim]
-            actual = concat(datasets, data[dim], coords=concat_over)
-            self.assertDatasetIdentical(data, rectify_dim_order(actual))
+        dim = 'dim2'
+        self.assertDatasetIdentical(
+            data, concat(datasets, data[dim]))
+        self.assertDatasetIdentical(
+            data, concat(datasets, data[dim], coords='minimal'))
 
-            actual = concat(datasets, data[dim], coords='different')
-            self.assertDatasetIdentical(data, rectify_dim_order(actual))
+        datasets = [g for _, g in data.groupby(dim, squeeze=True)]
+        concat_over = [k for k, v in iteritems(data.coords)
+                       if dim in v.dims and k != dim]
+        actual = concat(datasets, data[dim], coords=concat_over)
+        self.assertDatasetIdentical(data, rectify_dim_order(actual))
+
+        actual = concat(datasets, data[dim], coords='different')
+        self.assertDatasetIdentical(data, rectify_dim_order(actual))
 
         # make sure the coords argument behaves as expected
         data.coords['extra'] = ('dim4', np.arange(3))
@@ -111,7 +113,8 @@ def test_concat_autoalign(self):
         ds2 = Dataset({'foo': DataArray([1, 2], coords=[('x', [1, 3])])})
         actual = concat([ds1, ds2], 'y')
         expected = Dataset({'foo': DataArray([[1, 2, np.nan], [1, np.nan, 2]],
-                                             dims=['y', 'x'], coords={'y': [0, 1], 'x': [1, 2, 3]})})
+                                             dims=['y', 'x'],
+                                             coords={'x': [1, 2, 3]})})
         self.assertDatasetIdentical(expected, actual)
 
     def test_concat_errors(self):
@@ -184,7 +187,7 @@ def test_concat_promote_shape(self):
         objs = [Dataset({'x': [0]}, {'y': -1}),
                 Dataset({'x': [1, 2]}, {'y': -2})]
         actual = concat(objs, 'x')
-        expected = Dataset({}, {'y': ('x', [-1, -2, -2])})
+        expected = Dataset({'x': [0, 1, 2]}, {'y': ('x', [-1, -2, -2])})
         self.assertDatasetIdentical(actual, expected)
 
         # broadcast 1d x 1d -> 2d
@@ -196,8 +199,8 @@ def test_concat_promote_shape(self):
 
     def test_concat_do_not_promote(self):
         # GH438
-        objs = [Dataset({'y': ('t', [1])}, {'x': 1}),
-                Dataset({'y': ('t', [2])}, {'x': 1})]
+        objs = [Dataset({'y': ('t', [1])}, {'x': 1, 't': 0}),
+                Dataset({'y': ('t', [2])}, {'x': 1, 't': 0})]
         expected = Dataset({'y': ('t', [1, 2])}, {'x': 1, 't': [0, 0]})
         actual = concat(objs, 't')
         self.assertDatasetIdentical(expected, actual)
@@ -291,8 +294,7 @@ def test_auto_combine(self):
         objs = [Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])),
                 Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))]))]
         actual = auto_combine(objs)
-        expected = Dataset(
-            {'x': ('a', [0, 1]), 'y': ('a', [0, 1]), 'a': [0, 0]})
+        expected = Dataset({'x': ('a', [0, 1]), 'y': ('a', [0, 1])})
         self.assertDatasetIdentical(expected, actual)
 
         objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})]
@@ -323,7 +325,8 @@ def test_auto_combine_previously_failed(self):
         datasets = [Dataset({'a': ('x', [2, 3]), 'x': [1, 2]}),
                     Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})]
         expected = Dataset({'a': (('t', 'x'),
-                                  [[np.nan, 2, 3], [1, 2, np.nan]])})
+                                  [[np.nan, 2, 3], [1, 2, np.nan]])},
+                           {'x': [0, 1, 2]})
         actual = auto_combine(datasets, concat_dim='t')
         self.assertDatasetIdentical(expected, actual)
 

diff --git a/xarray/test/test_dask.py b/xarray/test/test_dask.py
@@ -202,8 +202,10 @@ def assertLazyAndAllClose(self, expected, actual):
     def setUp(self):
         self.values = np.random.randn(4, 6)
         self.data = da.from_array(self.values, chunks=(2, 2))
-        self.eager_array = DataArray(self.values, dims=('x', 'y'), name='foo')
-        self.lazy_array = DataArray(self.data, dims=('x', 'y'), name='foo')
+        self.eager_array = DataArray(self.values, coords={'x': range(4)},
+                                     dims=('x', 'y'), name='foo')
+        self.lazy_array = DataArray(self.data, coords={'x': range(4)},
+                                    dims=('x', 'y'), name='foo')
 
     def test_rechunk(self):
         chunked = self.eager_array.chunk({'x': 2}).chunk({'y': 2})

diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py
@@ -2102,7 +2102,8 @@ def test_to_dataset_retains_keys(self):
         self.assertDatasetEqual(array, result)
 
     def test__title_for_slice(self):
-        array = DataArray(np.ones((4, 3, 2)), dims=['a', 'b', 'c'])
+        array = DataArray(np.ones((4, 3, 2)), dims=['a', 'b', 'c'],
+                          coords={'a': range(4), 'b': range(3), 'c': range(2)})
         self.assertEqual('', array._title_for_slice())
         self.assertEqual('c = 0', array.isel(c=0)._title_for_slice())
         title = array.isel(b=1, c=0)._title_for_slice()