Skip to content

Commit

Permalink
Preserve base and loffset arguments in resample (pydata#7444)
Browse files Browse the repository at this point in the history
* [test-upstream] Preserve base and loffset arguments in resample

While pandas is getting set to remove these, we have not had a
chance to emit a deprecation warning yet for them in xarray.
This should hopefully give users some extra time to adapt.

* Emit warning when base is not None

Co-authored-by: Deepak Cherian <[email protected]>

* Modify warnings to refer loffset and base as parameters; add tests

* Add type validation for loffset arguments

* Add typing and support for pd.Timedelta as an loffset

* pd.Timedelta is a subclass of datetime.timedelta

* [test-upstream] Remove unneeded skipif

* Fix failing tests

* [test-upstream] Add return type to tests

* [test-upstream] Update documentation

* [test-upstream] Fix mypy errors in tests

* Move _convert_base_to_offset to pdcompat and add a few more tests

* Use offset instead of base in documentation

---------

Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
spencerkclark and dcherian authored Mar 8, 2023
1 parent 821dc24 commit 6d771fc
Show file tree
Hide file tree
Showing 8 changed files with 239 additions and 64 deletions.
2 changes: 1 addition & 1 deletion doc/user-guide/weather-climate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:

.. ipython:: python
da.resample(time="81T", closed="right", label="right", base=3).mean()
da.resample(time="81T", closed="right", label="right", offset="3T").mean()
.. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations
.. _ISO 8601 standard: https://en.wikipedia.org/wiki/ISO_8601
Expand Down
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ Breaking changes

Deprecations
~~~~~~~~~~~~
- Following pandas, the ``base`` and ``loffset`` parameters of
:py:meth:`xr.DataArray.resample` and :py:meth:`xr.Dataset.resample` have been
deprecated and will be removed in a future version of xarray. Using the
``origin`` or ``offset`` parameters is recommended as a replacement for using
the ``base`` parameter and using time offset arithmetic is recommended as a
replacement for using the ``loffset`` parameter (:pull:`8459`). By `Spencer
Clark <https://github.com/spencerkclark>`_.


Bug fixes
Expand Down
70 changes: 48 additions & 22 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,14 @@
from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops
from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed
from xarray.core.options import OPTIONS, _get_keep_attrs
from xarray.core.pdcompat import _convert_base_to_offset
from xarray.core.pycompat import is_duck_dask_array
from xarray.core.utils import Frozen, either_dict_or_kwargs, is_scalar
from xarray.core.utils import (
Frozen,
either_dict_or_kwargs,
emit_user_level_warning,
is_scalar,
)

try:
import cftime
Expand Down Expand Up @@ -845,6 +851,12 @@ def _resample(
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for "24H" frequency, base could
range from 0 through 23.
.. deprecated:: 2023.03.0
Following pandas, the ``base`` parameter is deprecated in favor
of the ``origin`` and ``offset`` parameters, and will be removed
in a future version of xarray.
origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
The datetime on which to adjust the grouping. The timezone of origin
must match the timezone of the index.
Expand All @@ -860,6 +872,12 @@ def _resample(
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
.. deprecated:: 2023.03.0
Following pandas, the ``loffset`` parameter is deprecated in favor
of using time offset arithmetic, and will be removed in a future
version of xarray.
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
Expand Down Expand Up @@ -930,8 +948,8 @@ def _resample(
"""
# TODO support non-string indexer after removing the old API.

from xarray.coding.cftimeindex import CFTimeIndex
from xarray.core.dataarray import DataArray
from xarray.core.groupby import TimeResampleGrouper
from xarray.core.resample import RESAMPLE_DIM

if keep_attrs is not None:
Expand Down Expand Up @@ -961,28 +979,36 @@ def _resample(
dim_name: Hashable = dim
dim_coord = self[dim]

if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
from xarray.core.resample_cftime import CFTimeGrouper

grouper = CFTimeGrouper(
freq=freq,
closed=closed,
label=label,
base=base,
loffset=loffset,
origin=origin,
offset=offset,
if loffset is not None:
emit_user_level_warning(
"Following pandas, the `loffset` parameter to resample will be deprecated "
"in a future version of xarray. Switch to using time offset arithmetic.",
FutureWarning,
)
else:
grouper = pd.Grouper(
freq=freq,
closed=closed,
label=label,
base=base,
offset=offset,
origin=origin,
loffset=loffset,

if base is not None:
emit_user_level_warning(
"Following pandas, the `base` parameter to resample will be deprecated in "
"a future version of xarray. Switch to using `origin` or `offset` instead.",
FutureWarning,
)

if base is not None and offset is not None:
raise ValueError("base and offset cannot be present at the same time")

if base is not None:
index = self._indexes[dim_name].to_pandas_index()
offset = _convert_base_to_offset(base, freq, index)

grouper = TimeResampleGrouper(
freq=freq,
closed=closed,
label=label,
origin=origin,
offset=offset,
loffset=loffset,
)

group = DataArray(
dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
)
Expand Down
78 changes: 65 additions & 13 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

from xarray.core.dataarray import DataArray
from xarray.core.dataset import Dataset
from xarray.core.types import DatetimeLike, SideOptions
from xarray.core.utils import Frozen

GroupKey = Any
Expand Down Expand Up @@ -245,7 +246,10 @@ def _unique_and_monotonic(group: T_Group) -> bool:
return index.is_unique and index.is_monotonic_increasing


def _apply_loffset(grouper, result):
def _apply_loffset(
loffset: str | pd.DateOffset | datetime.timedelta | pd.Timedelta,
result: pd.Series | pd.DataFrame,
):
"""
(copied from pandas)
if loffset is set, offset the result index
Expand All @@ -258,17 +262,25 @@ def _apply_loffset(grouper, result):
result : Series or DataFrame
the result of resample
"""
# pd.Timedelta is a subclass of datetime.timedelta so we do not need to
# include it in instance checks.
if not isinstance(loffset, (str, pd.DateOffset, datetime.timedelta)):
raise ValueError(
f"`loffset` must be a str, pd.DateOffset, datetime.timedelta, or pandas.Timedelta object. "
f"Got {loffset}."
)

if isinstance(loffset, str):
loffset = pd.tseries.frequencies.to_offset(loffset)

needs_offset = (
isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta))
isinstance(loffset, (pd.DateOffset, datetime.timedelta))
and isinstance(result.index, pd.DatetimeIndex)
and len(result.index) > 0
)

if needs_offset:
result.index = result.index + grouper.loffset

grouper.loffset = None
result.index = result.index + loffset


class GroupBy(Generic[T_Xarray]):
Expand Down Expand Up @@ -530,14 +542,7 @@ def __repr__(self) -> str:
)

def _get_index_and_items(self, index, grouper):
from xarray.core.resample_cftime import CFTimeGrouper

s = pd.Series(np.arange(index.size), index)
if isinstance(grouper, CFTimeGrouper):
first_items = grouper.first_items(index)
else:
first_items = s.groupby(grouper).first()
_apply_loffset(grouper, first_items)
first_items = grouper.first_items(index)
full_index = first_items.index
if first_items.isnull().any():
first_items = first_items.dropna()
Expand Down Expand Up @@ -1365,3 +1370,50 @@ class DatasetGroupBy( # type: ignore[misc]
ImplementsDatasetReduce,
):
__slots__ = ()


class TimeResampleGrouper:
def __init__(
self,
freq: str,
closed: SideOptions | None,
label: SideOptions | None,
origin: str | DatetimeLike,
offset: pd.Timedelta | datetime.timedelta | str | None,
loffset: datetime.timedelta | str | None,
):
self.freq = freq
self.closed = closed
self.label = label
self.origin = origin
self.offset = offset
self.loffset = loffset

def first_items(self, index):
from xarray import CFTimeIndex
from xarray.core.resample_cftime import CFTimeGrouper

if isinstance(index, CFTimeIndex):
grouper = CFTimeGrouper(
freq=self.freq,
closed=self.closed,
label=self.label,
origin=self.origin,
offset=self.offset,
loffset=self.loffset,
)
return grouper.first_items(index)
else:
s = pd.Series(np.arange(index.size), index)
grouper = pd.Grouper(
freq=self.freq,
closed=self.closed,
label=self.label,
origin=self.origin,
offset=self.offset,
)

first_items = s.groupby(grouper).first()
if self.loffset is not None:
_apply_loffset(self.loffset, first_items)
return first_items
23 changes: 23 additions & 0 deletions xarray/core/pdcompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
from enum import Enum
from typing import Literal

import pandas as pd

from xarray.coding import cftime_offsets


def count_not_none(*args) -> int:
"""Compute the number of non-None arguments.
Expand Down Expand Up @@ -68,3 +72,22 @@ def __repr__(self) -> str:
_NoDefault.no_default
) # Sentinel indicating the default value following pandas
NoDefault = Literal[_NoDefault.no_default] # For typing following pandas


def _convert_base_to_offset(base, freq, index):
"""Required until we officially deprecate the base argument to resample. This
translates a provided `base` argument to an `offset` argument, following logic
from pandas.
"""
from xarray.coding.cftimeindex import CFTimeIndex

if isinstance(index, pd.DatetimeIndex):
freq = pd.tseries.frequencies.to_offset(freq)
if isinstance(freq, pd.offsets.Tick):
return pd.Timedelta(base * freq.nanos // freq.n)
elif isinstance(index, CFTimeIndex):
freq = cftime_offsets.to_offset(freq)
if isinstance(freq, cftime_offsets.Tick):
return base * freq.as_timedelta() // freq.n
else:
raise ValueError("Can only resample using a DatetimeIndex or CFTimeIndex.")
18 changes: 10 additions & 8 deletions xarray/core/resample_cftime.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,13 @@ def __init__(
freq: str | BaseCFTimeOffset,
closed: SideOptions | None = None,
label: SideOptions | None = None,
base: int | None = None,
loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None,
origin: str | CFTimeDatetime = "start_day",
offset: str | datetime.timedelta | None = None,
):
self.offset: datetime.timedelta | None
self.closed: SideOptions
self.label: SideOptions

if base is not None and offset is not None:
raise ValueError("base and offset cannot be provided at the same time")

self.freq = to_offset(freq)
self.loffset = loffset
self.origin = origin
Expand Down Expand Up @@ -122,9 +117,6 @@ def __init__(
else:
self.label = label

if base is not None and isinstance(self.freq, Tick):
offset = type(self.freq)(n=base % self.freq.n).as_timedelta()

if offset is not None:
try:
self.offset = _convert_offset_to_timedelta(offset)
Expand All @@ -150,6 +142,16 @@ def first_items(self, index: CFTimeIndex):
index, self.freq, self.closed, self.label, self.origin, self.offset
)
if self.loffset is not None:
if not isinstance(
self.loffset, (str, datetime.timedelta, BaseCFTimeOffset)
):
# BaseCFTimeOffset is not public API so we do not include it in
# the error message for now.
raise ValueError(
f"`loffset` must be a str or datetime.timedelta object. "
f"Got {self.loffset}."
)

if isinstance(self.loffset, datetime.timedelta):
labels = labels + self.loffset
else:
Expand Down
Loading

0 comments on commit 6d771fc

Please sign in to comment.