Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Global option to always keep/discard attrs on operations #2482

Merged
merged 15 commits into from
Oct 30, 2018
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ conventions`_. (An exception is serialization to and from netCDF files.)

An implication of this choice is that we do not propagate ``attrs`` through
most operations unless explicitly flagged (some methods have a ``keep_attrs``
option). Similarly, xarray does not check for conflicts between ``attrs`` when
option, and there is a global flag for setting this to be always True or
False). Similarly, xarray does not check for conflicts between ``attrs`` when
combining arrays and datasets, unless explicitly requested with the option
``compat='identical'``. The guiding principle is that metadata should not be
allowed to get in the way.
Expand Down
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ Enhancements
:py:meth:`~xarray.DataArray.interp`, and
:py:meth:`~xarray.Dataset.interp`.
By `Spencer Clark <https://github.com/spencerkclark>`_
- There is now a global option to either always keep or always discard
dataset and dataarray attrs upon operations. The option is set with
``xarray.set_options(keep_attrs=True)``, and the default is to use the old
behaviour.
By `Tom Nicholas <http://github.com/TomNicholas>`_.

Bug fixes
~~~~~~~~~
Expand Down
28 changes: 20 additions & 8 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .arithmetic import SupportsArithmetic
from .pycompat import OrderedDict, basestring, dask_array_type, suppress
from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs
from .options import _get_keep_attrs

# Used as a sentinel value to indicate a all dimensions
ALL_DIMS = ReprObject('<all-dims>')
Expand All @@ -21,12 +22,16 @@ class ImplementsArrayReduce(object):
def _reduce_method(cls, func, include_skipna, numeric_only):
if include_skipna:
def wrapped_func(self, dim=None, axis=None, skipna=None,
keep_attrs=False, **kwargs):
keep_attrs=None, **kwargs):
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
TomNicholas marked this conversation as resolved.
Show resolved Hide resolved
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
skipna=skipna, allow_lazy=True, **kwargs)
else:
def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
**kwargs):
def wrapped_func(self, dim=None, axis=None,
keep_attrs=None, **kwargs):
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
allow_lazy=True, **kwargs)
return wrapped_func
Expand All @@ -51,14 +56,18 @@ class ImplementsDatasetReduce(object):
@classmethod
def _reduce_method(cls, func, include_skipna, numeric_only):
if include_skipna:
def wrapped_func(self, dim=None, keep_attrs=False, skipna=None,
def wrapped_func(self, dim=None, keep_attrs=None, skipna=None,
**kwargs):
return self.reduce(func, dim, keep_attrs, skipna=skipna,
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
return self.reduce(func, dim, keep_attrs=keep_attrs, skipna=skipna,
numeric_only=numeric_only, allow_lazy=True,
**kwargs)
else:
def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
return self.reduce(func, dim, keep_attrs,
def wrapped_func(self, dim=None, keep_attrs=None, **kwargs):
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
return self.reduce(func, dim, keep_attrs=keep_attrs,
numeric_only=numeric_only, allow_lazy=True,
**kwargs)
return wrapped_func
Expand Down Expand Up @@ -590,7 +599,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
center=center)

def resample(self, freq=None, dim=None, how=None, skipna=None,
closed=None, label=None, base=0, keep_attrs=False, **indexer):
closed=None, label=None, base=0, keep_attrs=None, **indexer):
"""Returns a Resample object for performing resampling operations.

Handles both downsampling and upsampling. If any intervals contain no
Expand Down Expand Up @@ -658,6 +667,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
from .dataarray import DataArray
from .resample import RESAMPLE_DIM

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

if dim is not None:
if how is None:
how = 'mean'
Expand Down
18 changes: 14 additions & 4 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
assert_coordinate_consistent, remap_label_indexers)
from .dataset import Dataset, merge_indexes, split_indexes
from .formatting import format_item
from .options import OPTIONS
from .options import OPTIONS, _get_keep_attrs
from .pycompat import OrderedDict, basestring, iteritems, range, zip
from .utils import (
decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution)
Expand Down Expand Up @@ -1559,7 +1559,7 @@ def combine_first(self, other):
"""
return ops.fillna(self, other, join="outer")

def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
"""Reduce this array by applying `func` along some dimension(s).

Parameters
Expand Down Expand Up @@ -1588,6 +1588,10 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
DataArray with this object's array replaced with an array with
summarized data and the indicated dimension(s) removed.
"""

if keep_attrs is None:
TomNicholas marked this conversation as resolved.
Show resolved Hide resolved
keep_attrs = _get_keep_attrs(default=False)

var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
return self._replace_maybe_drop_dims(var)

Expand Down Expand Up @@ -2270,7 +2274,7 @@ def sortby(self, variables, ascending=True):
ds = self._to_temp_dataset().sortby(variables, ascending=ascending)
return self._from_temp_dataset(ds)

def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None):
"""Compute the qth quantile of the data along the specified dimension.

Returns the qth quantiles(s) of the array elements.
Expand Down Expand Up @@ -2312,11 +2316,14 @@ def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile
"""

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

TomNicholas marked this conversation as resolved.
Show resolved Hide resolved
ds = self._to_temp_dataset().quantile(
q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation)
return self._from_temp_dataset(ds)

def rank(self, dim, pct=False, keep_attrs=False):
def rank(self, dim, pct=False, keep_attrs=None):
"""Ranks the data.

Equal values are assigned a rank that is the average of the ranks that
Expand Down Expand Up @@ -2352,6 +2359,9 @@ def rank(self, dim, pct=False, keep_attrs=False):
array([ 1., 2., 3.])
Dimensions without coordinates: x
"""
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs)
return self._from_temp_dataset(ds)

Expand Down
27 changes: 19 additions & 8 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .merge import (
dataset_merge_method, dataset_update_method, merge_data_and_coords,
merge_variables)
from .options import OPTIONS
from .options import OPTIONS, _get_keep_attrs
from .pycompat import (
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
from .utils import (
Expand Down Expand Up @@ -2870,7 +2870,7 @@ def combine_first(self, other):
out = ops.fillna(self, other, join="outer", dataset_join="outer")
return out

def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
allow_lazy=False, **kwargs):
"""Reduce this dataset by applying `func` along some dimension(s).

Expand Down Expand Up @@ -2912,6 +2912,9 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
raise ValueError('Dataset does not contain the dimensions: %s'
% missing_dimensions)

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

variables = OrderedDict()
for name, var in iteritems(self._variables):
reduce_dims = [dim for dim in var.dims if dim in dims]
Expand Down Expand Up @@ -2940,7 +2943,7 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
attrs = self.attrs if keep_attrs else None
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)

def apply(self, func, keep_attrs=False, args=(), **kwargs):
def apply(self, func, keep_attrs=None, args=(), **kwargs):
"""Apply a function over the data variables in this dataset.

Parameters
Expand All @@ -2950,7 +2953,7 @@ def apply(self, func, keep_attrs=False, args=(), **kwargs):
transform each DataArray `x` in this dataset into another
DataArray.
keep_attrs : bool, optional
If True, the dataset's attributes (`attrs`) will be copied from
If True, the dataset's attributes (`attrs`) will be _getcopied from
TomNicholas marked this conversation as resolved.
Show resolved Hide resolved
the original object to the new one. If False, the new object will
be returned without attributes.
args : tuple, optional
Expand Down Expand Up @@ -2985,6 +2988,8 @@ def apply(self, func, keep_attrs=False, args=(), **kwargs):
variables = OrderedDict(
(k, maybe_wrap_array(v, func(v, *args, **kwargs)))
for k, v in iteritems(self.data_vars))
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
attrs = self.attrs if keep_attrs else None
return type(self)(variables, attrs=attrs)

Expand Down Expand Up @@ -3649,7 +3654,7 @@ def sortby(self, variables, ascending=True):
return aligned_self.isel(**indices)

def quantile(self, q, dim=None, interpolation='linear',
numeric_only=False, keep_attrs=False):
numeric_only=False, keep_attrs=None):
"""Compute the qth quantile of the data along the specified dimension.

Returns the qth quantiles(s) of the array elements for each variable
Expand Down Expand Up @@ -3727,6 +3732,8 @@ def quantile(self, q, dim=None, interpolation='linear',

# construct the new dataset
coord_names = set(k for k in self.coords if k in variables)
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
attrs = self.attrs if keep_attrs else None
new = self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
if 'quantile' in new.dims:
Expand All @@ -3735,7 +3742,7 @@ def quantile(self, q, dim=None, interpolation='linear',
new.coords['quantile'] = q
return new

def rank(self, dim, pct=False, keep_attrs=False):
def rank(self, dim, pct=False, keep_attrs=None):
"""Ranks the data.

Equal values are assigned a rank that is the average of the ranks that
Expand Down Expand Up @@ -3775,6 +3782,8 @@ def rank(self, dim, pct=False, keep_attrs=False):
variables[name] = var

coord_names = set(self.coords)
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
attrs = self.attrs if keep_attrs else None
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)

Expand Down Expand Up @@ -3838,11 +3847,13 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):

@property
def real(self):
return self._unary_op(lambda x: x.real, keep_attrs=True)(self)
return self._unary_op(lambda x: x.real,
keep_attrs=True)(self)

@property
def imag(self):
return self._unary_op(lambda x: x.imag, keep_attrs=True)(self)
return self._unary_op(lambda x: x.imag,
keep_attrs=True)(self)

def filter_by_attrs(self, **kwargs):
"""Returns a ``Dataset`` with variables that match specific conditions.
Expand Down
39 changes: 26 additions & 13 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .pycompat import integer_types, range, zip
from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index
from .variable import IndexVariable, Variable, as_variable
from .options import _get_keep_attrs


def unique_value_groups(ar, sort=True):
Expand Down Expand Up @@ -404,15 +405,17 @@ def _first_or_last(self, op, skipna, keep_attrs):
# NB. this is currently only used for reductions along an existing
# dimension
return self._obj
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=True)
return self.reduce(op, self._group_dim, skipna=skipna,
keep_attrs=keep_attrs, allow_lazy=True)

def first(self, skipna=None, keep_attrs=True):
def first(self, skipna=None, keep_attrs=None):
"""Return the first element of each group along the group dimension
"""
return self._first_or_last(duck_array_ops.first, skipna, keep_attrs)

def last(self, skipna=None, keep_attrs=True):
def last(self, skipna=None, keep_attrs=None):
"""Return the last element of each group along the group dimension
"""
return self._first_or_last(duck_array_ops.last, skipna, keep_attrs)
Expand Down Expand Up @@ -538,8 +541,8 @@ def _combine(self, applied, shortcut=False):
combined = self._maybe_unstack(combined)
return combined

def reduce(self, func, dim=None, axis=None, keep_attrs=False,
shortcut=True, **kwargs):
def reduce(self, func, dim=None, axis=None,
keep_attrs=None, shortcut=True, **kwargs):
"""Reduce the items in this group by applying `func` along some
dimension(s).

Expand Down Expand Up @@ -579,6 +582,9 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
"warning, pass dim=xarray.ALL_DIMS explicitly.",
FutureWarning, stacklevel=2)

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

def reduce_array(ar):
return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs)
return self.apply(reduce_array, shortcut=shortcut)
Expand All @@ -589,12 +595,16 @@ def reduce_array(ar):
def _reduce_method(cls, func, include_skipna, numeric_only):
if include_skipna:
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None, skipna=None,
keep_attrs=False, **kwargs):
keep_attrs=None, **kwargs):
if keep_attrs is None:
TomNicholas marked this conversation as resolved.
Show resolved Hide resolved
keep_attrs = _get_keep_attrs(default=False)
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
skipna=skipna, allow_lazy=True, **kwargs)
else:
def wrapped_func(self, dim=DEFAULT_DIMS, axis=None,
keep_attrs=False, **kwargs):
keep_attrs=None, **kwargs):
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
allow_lazy=True, **kwargs)
return wrapped_func
Expand Down Expand Up @@ -650,7 +660,7 @@ def _combine(self, applied):
combined = self._maybe_unstack(combined)
return combined

def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
"""Reduce the items in this group by applying `func` along some
dimension(s).

Expand Down Expand Up @@ -691,6 +701,9 @@ def reduce(self, func, dim=None, keep_attrs=False, **kwargs):
elif dim is None:
dim = self._group_dim

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

def reduce_dataset(ds):
return ds.reduce(func, dim, keep_attrs, **kwargs)
return self.apply(reduce_dataset)
Expand All @@ -700,15 +713,15 @@ def reduce_dataset(ds):
@classmethod
def _reduce_method(cls, func, include_skipna, numeric_only):
if include_skipna:
def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False,
def wrapped_func(self, dim=DEFAULT_DIMS,
skipna=None, **kwargs):
return self.reduce(func, dim, keep_attrs, skipna=skipna,
numeric_only=numeric_only, allow_lazy=True,
**kwargs)
return self.reduce(func, dim,
skipna=skipna, numeric_only=numeric_only,
allow_lazy=True, **kwargs)
else:
def wrapped_func(self, dim=DEFAULT_DIMS, keep_attrs=False,
def wrapped_func(self, dim=DEFAULT_DIMS,
**kwargs):
return self.reduce(func, dim, keep_attrs,
return self.reduce(func, dim,
numeric_only=numeric_only, allow_lazy=True,
**kwargs)
return wrapped_func
Expand Down
Loading