diff --git a/xarray/core/common.py b/xarray/core/common.py index 6dff9cc4024..3b0d549df52 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -18,7 +18,6 @@ from xarray.core.utils import ( Frozen, either_dict_or_kwargs, - emit_user_level_warning, is_scalar, ) from xarray.namedarray.core import _raise_if_any_duplicate_dimensions @@ -984,8 +983,7 @@ def _resample( # TODO support non-string indexer after removing the old API. from xarray.core.dataarray import DataArray - from xarray.core.groupby import ResolvedTimeResampleGrouper, TimeResampleGrouper - from xarray.core.pdcompat import _convert_base_to_offset + from xarray.core.groupby import ResolvedTimeResampler, TimeResampler from xarray.core.resample import RESAMPLE_DIM # note: the second argument (now 'skipna') use to be 'dim' @@ -1008,44 +1006,24 @@ def _resample( dim_name: Hashable = dim dim_coord = self[dim] - if loffset is not None: - emit_user_level_warning( - "Following pandas, the `loffset` parameter to resample is deprecated. " - "Switch to updating the resampled dataset time coordinate using " - "time offset arithmetic. For example:\n" - " >>> offset = pd.tseries.frequencies.to_offset(freq) / 2\n" - ' >>> resampled_ds["time"] = resampled_ds.get_index("time") + offset', - FutureWarning, - ) - - if base is not None: - emit_user_level_warning( - "Following pandas, the `base` parameter to resample will be deprecated in " - "a future version of xarray. Switch to using `origin` or `offset` instead.", - FutureWarning, - ) - - if base is not None and offset is not None: - raise ValueError("base and offset cannot be present at the same time") - - if base is not None: - index = self._indexes[dim_name].to_pandas_index() - offset = _convert_base_to_offset(base, freq, index) + group = DataArray( + dim_coord, + coords=dim_coord.coords, + dims=dim_coord.dims, + name=RESAMPLE_DIM, + ) - grouper = TimeResampleGrouper( + grouper = TimeResampler( freq=freq, closed=closed, label=label, origin=origin, offset=offset, loffset=loffset, + base=base, ) - group = DataArray( - dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM - ) - - rgrouper = ResolvedTimeResampleGrouper(grouper, group, self) + rgrouper = ResolvedTimeResampler(grouper, group, self) return resample_cls( self, diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 15bd8d1e35b..c774a086586 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -38,6 +38,7 @@ from xarray.core.utils import ( FrozenMappingWarningOnValuesAccess, either_dict_or_kwargs, + emit_user_level_warning, hashable, is_scalar, maybe_wrap_array, @@ -482,43 +483,66 @@ def _factorize(self, squeeze: bool) -> T_FactorizeOut: @dataclass -class ResolvedTimeResampleGrouper(ResolvedGrouper): - grouper: TimeResampleGrouper +class ResolvedTimeResampler(ResolvedGrouper): + grouper: TimeResampler index_grouper: CFTimeGrouper | pd.Grouper = field(init=False) + group_as_index: pd.Index = field(init=False) + + def __post_init__(self): + if self.loffset is not None: + emit_user_level_warning( + "Following pandas, the `loffset` parameter to resample will be deprecated " + "in a future version of xarray. Switch to using time offset arithmetic.", + FutureWarning, + ) - def __post_init__(self) -> None: - super().__post_init__() + if self.base is not None: + emit_user_level_warning( + "Following pandas, the `base` parameter to resample will be deprecated in " + "a future version of xarray. Switch to using `origin` or `offset` instead.", + FutureWarning, + ) + + if self.base is not None and self.offset is not None: + raise ValueError("base and offset cannot be present at the same time") + def _init_properties(self, group): from xarray import CFTimeIndex + from xarray.core.pdcompat import _convert_base_to_offset - group_as_index = safe_cast_to_index(self.group) - self._group_as_index = group_as_index + group_as_index = safe_cast_to_index(group) + + if self.base is not None: + # grouper constructor verifies that grouper.offset is None at this point + offset = _convert_base_to_offset(self.base, self.freq, group_as_index) + else: + offset = self.offset if not group_as_index.is_monotonic_increasing: # TODO: sort instead of raising an error raise ValueError("index must be monotonic for resampling") - grouper = self.grouper if isinstance(group_as_index, CFTimeIndex): from xarray.core.resample_cftime import CFTimeGrouper index_grouper = CFTimeGrouper( - freq=grouper.freq, - closed=grouper.closed, - label=grouper.label, - origin=grouper.origin, - offset=grouper.offset, - loffset=grouper.loffset, + freq=self.freq, + closed=self.closed, + label=self.label, + origin=self.origin, + offset=offset, + loffset=self.loffset, ) else: index_grouper = pd.Grouper( - freq=grouper.freq, - closed=grouper.closed, - label=grouper.label, - origin=grouper.origin, - offset=grouper.offset, + freq=self.freq, + closed=self.closed, + label=self.label, + origin=self.origin, + offset=offset, ) self.index_grouper = index_grouper + self.group_as_index = group_as_index def _get_index_and_items(self) -> tuple[pd.Index, pd.Series, np.ndarray]: first_items, codes = self.first_items() @@ -543,11 +567,12 @@ def first_items(self) -> tuple[pd.Series, np.ndarray]: # So for _flox_reduce we avoid one reindex and copy by avoiding # _maybe_restore_empty_groups codes = np.repeat(np.arange(len(first_items)), counts) - if self.grouper.loffset is not None: - _apply_loffset(self.grouper.loffset, first_items) + if self.loffset is not None: + _apply_loffset(self.loffset, first_items) return first_items, codes - def _factorize(self, squeeze: bool) -> T_FactorizeOut: + def _factorize(self, group) -> T_FactorizeOut: + self._init_properties(group) full_index, first_items, codes_ = self._get_index_and_items() sbins = first_items.values.astype(np.int64) group_indices: T_GroupIndices = [ @@ -555,10 +580,8 @@ def _factorize(self, squeeze: bool) -> T_FactorizeOut: ] group_indices += [slice(sbins[-1], None)] - unique_coord = IndexVariable( - self.group.name, first_items.index, self.group.attrs - ) - codes = self.group.copy(data=codes_) + unique_coord = IndexVariable(group.name, first_items.index, group.attrs) + codes = group.copy(data=codes_) return codes, group_indices, unique_coord, full_index @@ -583,13 +606,32 @@ def __post_init__(self) -> None: @dataclass -class TimeResampleGrouper(Grouper): +class TimeResampler(Grouper): freq: str - closed: SideOptions | None - label: SideOptions | None - origin: str | DatetimeLike | None - offset: pd.Timedelta | datetime.timedelta | str | None - loffset: datetime.timedelta | str | None + closed: SideOptions | None = field(default=None) + label: SideOptions | None = field(default=None) + origin: str | DatetimeLike = field(default="start_day") + offset: pd.Timedelta | datetime.timedelta | str | None = field(default=None) + loffset: datetime.timedelta | str | None = field(default=None) + base: str | None = field(default=None) + + def __post_init__(self): + if self.loffset is not None: + emit_user_level_warning( + "Following pandas, the `loffset` parameter to resample will be deprecated " + "in a future version of xarray. Switch to using time offset arithmetic.", + FutureWarning, + ) + + if self.base is not None: + emit_user_level_warning( + "Following pandas, the `base` parameter to resample will be deprecated in " + "a future version of xarray. Switch to using `origin` or `offset` instead.", + FutureWarning, + ) + + if self.base is not None and self.offset is not None: + raise ValueError("base and offset cannot be present at the same time") def _validate_groupby_squeeze(squeeze: bool) -> None: @@ -936,7 +978,7 @@ def _maybe_restore_empty_groups(self, combined): """ (grouper,) = self.groupers if ( - isinstance(grouper, (ResolvedBinGrouper, ResolvedTimeResampleGrouper)) + isinstance(grouper, (ResolvedBinGrouper, ResolvedTimeResampler)) and grouper.name in combined.dims ): indexers = {grouper.name: grouper.full_index}