Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
shoyer committed Sep 29, 2016
1 parent 7901c0c commit 510997a
Show file tree
Hide file tree
Showing 10 changed files with 60 additions and 80 deletions.
25 changes: 0 additions & 25 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,6 @@ def _decode_variable_name(name):
return name


def is_trivial_index(var):
"""
Determines if in index is 'trivial' meaning that it is
equivalent to np.arange(). This is determined by
checking if there are any attributes or encodings,
if ndims is one, dtype is int and finally by comparing
the actual values to np.arange()
"""
# if either attributes or encodings are defined
# the index is not trivial.
if len(var.attrs) or len(var.encoding):
return False
# if the index is not a 1d integer array
if var.ndim > 1 or not var.dtype.kind == 'i':
return False
arange = np.arange(var.size, dtype=var.dtype)
return np.all(var.values == arange)


def robust_getitem(array, key, catch=Exception, max_retries=6,
initial_delay=500):
"""
Expand Down Expand Up @@ -200,12 +181,6 @@ def store_dataset(self, dataset):

def store(self, variables, attributes, check_encoding_set=frozenset()):
self.set_attributes(attributes)
neccesary_dims = [v.dims for v in variables.values()]
neccesary_dims = set(itertools.chain(*neccesary_dims))
# set all non-indexes and any index which is not trivial.
variables = OrderedDict((k, v) for k, v in iteritems(variables)
if not (k in neccesary_dims and
is_trivial_index(v)))
self.set_variables(variables, check_encoding_set)

def set_attributes(self, attributes):
Expand Down
2 changes: 1 addition & 1 deletion xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,
identify coordinates.
drop_variables: string or iterable, optional
A variable or list of variables to exclude from being parsed from the
dataset.This may be useful to drop variables with problems or
dataset. This may be useful to drop variables with problems or
inconsistent values.
Returns
Expand Down
25 changes: 13 additions & 12 deletions xarray/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from . import ops, utils
from .common import _maybe_promote
from .indexing import get_indexer
from .pycompat import iteritems, OrderedDict
from .pycompat import iteritems, OrderedDict, suppress
from .utils import is_full_slice, is_dict_like
from .variable import Variable, IndexVariable

Expand Down Expand Up @@ -112,16 +112,16 @@ def align(*objects, **kwargs):
# pandas). This is useful, e.g., for overwriting such duplicate indexes.
joiner = _get_joiner(join)
joined_indexes = {}
for name, matching_indexes in iteritems(all_indexes):
if name in indexes:
index = utils.safe_cast_to_index(indexes[name])
for dim, matching_indexes in iteritems(all_indexes):
if dim in indexes:
index = utils.safe_cast_to_index(indexes[dim])
if any(not index.equals(other) for other in matching_indexes):
joined_indexes[name] = index
joined_indexes[dim] = index
else:
if any(not matching_indexes[0].equals(other)
for other in matching_indexes[1:]):
index = joiner(matching_indexes)
joined_indexes[name] = index
joined_indexes[dim] = index
else:
index = matching_indexes[0]

Expand Down Expand Up @@ -384,18 +384,17 @@ def broadcast(*args, **kwargs):
for arg in args:
for dim in arg.dims:
if dim not in common_coords and dim not in exclude:
common_coords[dim] = arg.coords[dim].variable
dims_map[dim] = common_coords[dim].size
dims_map[dim] = get_size(arg, dim)
if dim in arg.coords:
common_coords[dim] = arg.coords[dim].variable

def _expand_dims(var):
# Add excluded dims to a copy of dims_map
var_dims_map = dims_map.copy()
for dim in exclude:
try:
with suppress(ValueError):
# ignore dim not in var.dims
var_dims_map[dim] = var.shape[var.dims.index(dim)]
except ValueError:
# dim not in var.dims
pass

return var.expand_dims(var_dims_map)

Expand All @@ -414,6 +413,8 @@ def _broadcast_dataset(ds):
coords.update(common_coords)
return Dataset(data_vars, coords, ds.attrs)

print(common_coords, dims_map)

result = []
for arg in args:
if isinstance(arg, DataArray):
Expand Down
26 changes: 11 additions & 15 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,25 +219,21 @@ def __delitem__(self, key):
del self._data._coords[key]


class LevelCoordinates(AbstractCoordinates):
"""Dictionary like container for MultiIndex level coordinates.
class LevelCoordinatesSource(object):
"""Iterator for MultiIndex level coordinates.
Used for attribute style lookup. Not returned directly by any
public methods.
Used for attribute style lookup with AttrAccessMixin. Not returned directly
by any public methods.
"""
def __init__(self, dataarray):
self._data = dataarray
def __init__(self, data_object):
self._data = data_object

@property
def _names(self):
return set(self._data._level_coords)
def __getitem__(self, key):
# not necessary -- everything here can already be found on coords.
raise KeyError

@property
def variables(self):
level_coords = OrderedDict(
(k, self._data[v].variable.get_level_variable(k))
for k, v in self._data._level_coords.items())
return Frozen(level_coords)
def __iter__(self):
return iter(self._data._level_coords)


class Indexes(Mapping, formatting.ReprMixin):
Expand Down
7 changes: 4 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from . import utils
from .alignment import align
from .common import AbstractArray, BaseDataObject, squeeze
from .coordinates import (DataArrayCoordinates, LevelCoordinates,
from .coordinates import (DataArrayCoordinates, LevelCoordinatesSource,
Indexes)
from .dataset import Dataset
from .pycompat import iteritems, basestring, OrderedDict, zip
Expand Down Expand Up @@ -471,7 +471,7 @@ def __delitem__(self, key):
@property
def _attr_sources(self):
"""List of places to look-up items for attribute-style access"""
return [self.coords, LevelCoordinates(self), self.attrs]
return [self.coords, LevelCoordinatesSource(self), self.attrs]

def __contains__(self, key):
return key in self._coords
Expand Down Expand Up @@ -1639,7 +1639,8 @@ def dot(self, other):
axes = (self.get_axis_num(dims), other.get_axis_num(dims))
new_data = ops.tensordot(self.data, other.data, axes=axes)

new_coords = self.coords.merge(other.coords).drop(dims)
new_coords = self.coords.merge(other.coords)
new_coords = new_coords.drop([d for d in dims if d in new_coords])
new_dims = ([d for d in self.dims if d not in dims] +
[d for d in other.dims if d not in dims])

Expand Down
4 changes: 2 additions & 2 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from . import formatting
from .. import conventions
from .alignment import align
from .coordinates import DatasetCoordinates, LevelCoordinates, Indexes
from .coordinates import DatasetCoordinates, LevelCoordinatesSource, Indexes
from .common import ImplementsDatasetReduce, BaseDataObject
from .merge import (dataset_update_method, dataset_merge_method,
merge_data_and_coords)
Expand Down Expand Up @@ -499,7 +499,7 @@ def __deepcopy__(self, memo=None):
@property
def _attr_sources(self):
"""List of places to look-up items for attribute-style access"""
return [self, LevelCoordinates(self), self.attrs]
return [self, LevelCoordinatesSource(self), self.attrs]

def __contains__(self, key):
"""The 'in' operator will return true or false depending on whether
Expand Down
3 changes: 2 additions & 1 deletion xarray/test/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,8 @@ class MiscObject:
class TestValidateAttrs(TestCase):
def test_validating_attrs(self):
def new_dataset():
return Dataset({'data': ('y', np.arange(10.0))})
return Dataset({'data': ('y', np.arange(10.0))},
{'y': np.arange(10)})

def new_dataset_and_dataset_attrs():
ds = new_dataset()
Expand Down
39 changes: 21 additions & 18 deletions xarray/test/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,21 @@ def rectify_dim_order(dataset):
for dim in ['dim1', 'dim2']:
datasets = [g for _, g in data.groupby(dim, squeeze=False)]
self.assertDatasetIdentical(data, concat(datasets, dim))
self.assertDatasetIdentical(
data, concat(datasets, data[dim]))
self.assertDatasetIdentical(
data, concat(datasets, data[dim], coords='minimal'))

datasets = [g for _, g in data.groupby(dim, squeeze=True)]
concat_over = [k for k, v in iteritems(data.coords)
if dim in v.dims and k != dim]
actual = concat(datasets, data[dim], coords=concat_over)
self.assertDatasetIdentical(data, rectify_dim_order(actual))
dim = 'dim2'
self.assertDatasetIdentical(
data, concat(datasets, data[dim]))
self.assertDatasetIdentical(
data, concat(datasets, data[dim], coords='minimal'))

actual = concat(datasets, data[dim], coords='different')
self.assertDatasetIdentical(data, rectify_dim_order(actual))
datasets = [g for _, g in data.groupby(dim, squeeze=True)]
concat_over = [k for k, v in iteritems(data.coords)
if dim in v.dims and k != dim]
actual = concat(datasets, data[dim], coords=concat_over)
self.assertDatasetIdentical(data, rectify_dim_order(actual))

actual = concat(datasets, data[dim], coords='different')
self.assertDatasetIdentical(data, rectify_dim_order(actual))

# make sure the coords argument behaves as expected
data.coords['extra'] = ('dim4', np.arange(3))
Expand Down Expand Up @@ -111,7 +113,8 @@ def test_concat_autoalign(self):
ds2 = Dataset({'foo': DataArray([1, 2], coords=[('x', [1, 3])])})
actual = concat([ds1, ds2], 'y')
expected = Dataset({'foo': DataArray([[1, 2, np.nan], [1, np.nan, 2]],
dims=['y', 'x'], coords={'y': [0, 1], 'x': [1, 2, 3]})})
dims=['y', 'x'],
coords={'x': [1, 2, 3]})})
self.assertDatasetIdentical(expected, actual)

def test_concat_errors(self):
Expand Down Expand Up @@ -184,7 +187,7 @@ def test_concat_promote_shape(self):
objs = [Dataset({'x': [0]}, {'y': -1}),
Dataset({'x': [1, 2]}, {'y': -2})]
actual = concat(objs, 'x')
expected = Dataset({}, {'y': ('x', [-1, -2, -2])})
expected = Dataset({'x': [0, 1, 2]}, {'y': ('x', [-1, -2, -2])})
self.assertDatasetIdentical(actual, expected)

# broadcast 1d x 1d -> 2d
Expand All @@ -196,8 +199,8 @@ def test_concat_promote_shape(self):

def test_concat_do_not_promote(self):
# GH438
objs = [Dataset({'y': ('t', [1])}, {'x': 1}),
Dataset({'y': ('t', [2])}, {'x': 1})]
objs = [Dataset({'y': ('t', [1])}, {'x': 1, 't': 0}),
Dataset({'y': ('t', [2])}, {'x': 1, 't': 0})]
expected = Dataset({'y': ('t', [1, 2])}, {'x': 1, 't': [0, 0]})
actual = concat(objs, 't')
self.assertDatasetIdentical(expected, actual)
Expand Down Expand Up @@ -291,8 +294,7 @@ def test_auto_combine(self):
objs = [Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])),
Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))]))]
actual = auto_combine(objs)
expected = Dataset(
{'x': ('a', [0, 1]), 'y': ('a', [0, 1]), 'a': [0, 0]})
expected = Dataset({'x': ('a', [0, 1]), 'y': ('a', [0, 1])})
self.assertDatasetIdentical(expected, actual)

objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})]
Expand Down Expand Up @@ -323,7 +325,8 @@ def test_auto_combine_previously_failed(self):
datasets = [Dataset({'a': ('x', [2, 3]), 'x': [1, 2]}),
Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})]
expected = Dataset({'a': (('t', 'x'),
[[np.nan, 2, 3], [1, 2, np.nan]])})
[[np.nan, 2, 3], [1, 2, np.nan]])},
{'x': [0, 1, 2]})
actual = auto_combine(datasets, concat_dim='t')
self.assertDatasetIdentical(expected, actual)

Expand Down
6 changes: 4 additions & 2 deletions xarray/test/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,10 @@ def assertLazyAndAllClose(self, expected, actual):
def setUp(self):
self.values = np.random.randn(4, 6)
self.data = da.from_array(self.values, chunks=(2, 2))
self.eager_array = DataArray(self.values, dims=('x', 'y'), name='foo')
self.lazy_array = DataArray(self.data, dims=('x', 'y'), name='foo')
self.eager_array = DataArray(self.values, coords={'x': range(4)},
dims=('x', 'y'), name='foo')
self.lazy_array = DataArray(self.data, coords={'x': range(4)},
dims=('x', 'y'), name='foo')

def test_rechunk(self):
chunked = self.eager_array.chunk({'x': 2}).chunk({'y': 2})
Expand Down
3 changes: 2 additions & 1 deletion xarray/test/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2102,7 +2102,8 @@ def test_to_dataset_retains_keys(self):
self.assertDatasetEqual(array, result)

def test__title_for_slice(self):
array = DataArray(np.ones((4, 3, 2)), dims=['a', 'b', 'c'])
array = DataArray(np.ones((4, 3, 2)), dims=['a', 'b', 'c'],
coords={'a': range(4), 'b': range(3), 'c': range(2)})
self.assertEqual('', array._title_for_slice())
self.assertEqual('c = 0', array.isel(c=0)._title_for_slice())
title = array.isel(b=1, c=0)._title_for_slice()
Expand Down

0 comments on commit 510997a

Please sign in to comment.