Skip to content

Commit

Permalink
Added a support for Dataset.rolling. (pydata#1289)
Browse files Browse the repository at this point in the history
* Added support for Dataset.rolling

* Fixed an accidental bug in test_dataset.py

* Dimension order is restored after Dataset.rolling.

* Add some docstrings. Code clean up.

* Make rolling_cls and groupby_cls private.

* Removed another old-style call for the superclass method.

* Fixed some breaks made by the previous merge.

* Moved ImplementedArrayRolling into rolling

* Made DatasetRolling utilize DataArrayRolling of each DataArrays.

* Added 1 line to api.rst.

* Updating test_dataset to catch pydata#1278

* Test_dataset now tests bottleneck.move_var.

* Raises an exception if no data_vars depend on the rolling-dims.

* Fixed style issue and added docstring for inject_datasetrolling_methods

* Recover unintended change.

* An empty commit to trigger travicCI rebuild.

* Recover test_assign_attrs which is deleted accidentaly.
Some improvements based on shoyer's comments.

* An empty commit to trigger Travis's rebuild
  • Loading branch information
fujiisoup authored and shoyer committed Mar 31, 2017
1 parent f2a5015 commit 09ef2c2
Show file tree
Hide file tree
Showing 9 changed files with 403 additions and 93 deletions.
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ Computation
Dataset.reduce
Dataset.groupby
Dataset.groupby_bins
Dataset.rolling
Dataset.resample
Dataset.diff
Dataset.quantile
Expand Down
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ v0.9.2 (unreleased)

Enhancements
~~~~~~~~~~~~
- ``rolling`` on Dataset is now supported (:issue:`859`).
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
- When bottleneck version 1.1 or later is installed, use bottleneck for rolling
`var`, `argmin`, `argmax`, and `rank` computations. Also, `rolling.median`
now also accepts a `min_periods` argument (:issue:`1276`).
Expand Down
73 changes: 8 additions & 65 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,63 +69,6 @@ def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
and 'axis' arguments can be supplied."""


class ImplementsRollingArrayReduce(object):
@classmethod
def _reduce_method(cls, func):
def wrapped_func(self, **kwargs):
return self.reduce(func, **kwargs)
return wrapped_func

@classmethod
def _bottleneck_reduce(cls, func):
def wrapped_func(self, **kwargs):
from .dataarray import DataArray

if isinstance(self.obj.data, dask_array_type):
raise NotImplementedError(
'Rolling window operation does not work with dask arrays')

# bottleneck doesn't allow min_count to be 0, although it should
# work the same as if min_count = 1
if self.min_periods is not None and self.min_periods == 0:
min_count = self.min_periods + 1
else:
min_count = self.min_periods

values = func(self.obj.data, window=self.window,
min_count=min_count, axis=self._axis_num)

result = DataArray(values, self.obj.coords)

if self.center:
result = self._center_result(result)

return result
return wrapped_func

@classmethod
def _bottleneck_reduce_without_min_count(cls, func):
def wrapped_func(self, **kwargs):
from .dataarray import DataArray

if self.min_periods is not None:
raise ValueError('Rolling.median does not accept min_periods')

if isinstance(self.obj.data, dask_array_type):
raise NotImplementedError(
'Rolling window operation does not work with dask arrays')

values = func(self.obj.data, window=self.window, axis=self._axis_num)

result = DataArray(values, self.obj.coords)

if self.center:
result = self._center_result(result)

return result
return wrapped_func


class AbstractArray(ImplementsArrayReduce, formatting.ReprMixin):
"""Shared base class for DataArray and Variable."""

Expand Down Expand Up @@ -449,7 +392,7 @@ def groupby(self, group, squeeze=True):
A `GroupBy` object patterned after `pandas.GroupBy` that can be
iterated over in the form of `(unique_value, grouped_array)` pairs.
"""
return self.groupby_cls(self, group, squeeze=squeeze)
return self._groupby_cls(self, group, squeeze=squeeze)

def groupby_bins(self, group, bins, right=True, labels=None, precision=3,
include_lowest=False, squeeze=True):
Expand Down Expand Up @@ -498,10 +441,10 @@ def groupby_bins(self, group, bins, right=True, labels=None, precision=3,
----------
.. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
"""
return self.groupby_cls(self, group, squeeze=squeeze, bins=bins,
cut_kwargs={'right': right, 'labels': labels,
'precision': precision,
'include_lowest': include_lowest})
return self._groupby_cls(self, group, squeeze=squeeze, bins=bins,
cut_kwargs={'right': right, 'labels': labels,
'precision': precision,
'include_lowest': include_lowest})

def rolling(self, min_periods=None, center=False, **windows):
"""
Expand Down Expand Up @@ -530,8 +473,8 @@ def rolling(self, min_periods=None, center=False, **windows):
rolling : type of input argument
"""

return self.rolling_cls(self, min_periods=min_periods,
center=center, **windows)
return self._rolling_cls(self, min_periods=min_periods,
center=center, **windows)

def resample(self, freq, dim, how='mean', skipna=None, closed=None,
label=None, base=0, keep_attrs=False):
Expand Down Expand Up @@ -601,7 +544,7 @@ def resample(self, freq, dim, how='mean', skipna=None, closed=None,
group = DataArray(dim, [(RESAMPLE_DIM, dim)], name=RESAMPLE_DIM)
time_grouper = pd.TimeGrouper(freq=freq, how=how, closed=closed,
label=label, base=base)
gb = self.groupby_cls(self, group, grouper=time_grouper)
gb = self._groupby_cls(self, group, grouper=time_grouper)
if isinstance(how, basestring):
f = getattr(gb, how)
if how in ['first', 'last']:
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ class DataArray(AbstractArray, BaseDataObject):
attrs : OrderedDict
Dictionary for holding arbitrary metadata.
"""
groupby_cls = groupby.DataArrayGroupBy
rolling_cls = rolling.DataArrayRolling
_groupby_cls = groupby.DataArrayGroupBy
_rolling_cls = rolling.DataArrayRolling

def __init__(self, data, coords=None, dims=None, name=None,
attrs=None, encoding=None, fastpath=False):
Expand Down
4 changes: 3 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from . import ops
from . import utils
from . import groupby
from . import rolling
from . import indexing
from . import alignment
from . import formatting
Expand Down Expand Up @@ -306,7 +307,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, BaseDataObject,
One dimensional variables with name equal to their dimension are index
coordinates used for label based indexing.
"""
groupby_cls = groupby.DatasetGroupBy
_groupby_cls = groupby.DatasetGroupBy
_rolling_cls = rolling.DatasetRolling

def __init__(self, data_vars=None, coords=None, attrs=None,
compat='broadcast_equals'):
Expand Down
23 changes: 17 additions & 6 deletions xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def func(self, *args, **kwargs):
"""

_ROLLING_REDUCE_DOCSTRING_TEMPLATE = \
"""Reduce this DataArrayRolling's data windows by applying `{name}`
"""Reduce this {da_or_ds}'s data windows by applying `{name}`
along its dimension.
Parameters
Expand All @@ -269,8 +269,8 @@ def func(self, *args, **kwargs):
Returns
-------
reduced : DataArray
New DataArray object with `{name}` applied along its rolling dimnension.
reduced : {da_or_ds}
New {da_or_ds} object with `{name}` applied along its rolling dimnension.
"""


Expand Down Expand Up @@ -522,7 +522,7 @@ def inject_bottleneck_rolling_methods(cls):
func = cls._reduce_method(f)
func.__name__ = name
func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
name=func.__name__)
name=func.__name__, da_or_ds='DataArray')
setattr(cls, name, func)

# bottleneck rolling methods
Expand All @@ -541,7 +541,7 @@ def inject_bottleneck_rolling_methods(cls):
func = cls._bottleneck_reduce(f)
func.__name__ = method_name
func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
name=func.__name__)
name=func.__name__, da_or_ds='DataArray')
setattr(cls, method_name, func)
except AttributeError as e:
# skip functions not in Bottleneck 1.0
Expand All @@ -558,5 +558,16 @@ def inject_bottleneck_rolling_methods(cls):
func = cls._bottleneck_reduce_without_min_count(f)
func.__name__ = 'median'
func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
name=func.__name__)
name=func.__name__, da_or_ds='DataArray')
setattr(cls, 'median', func)


def inject_datasetrolling_methods(cls):
# standard numpy reduce methods
methods = [(name, globals()[name]) for name in NAN_REDUCE_METHODS]
for name, f in methods:
func = cls._reduce_method(f)
func.__name__ = name
func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
name=func.__name__, da_or_ds='Dataset')
setattr(cls, name, func)
Loading

0 comments on commit 09ef2c2

Please sign in to comment.