Added a support for Dataset.rolling. (pydata#1289)

* Added support for Dataset.rolling * Fixed an accidental bug in test_dataset.py * Dimension order is restored after Dataset.rolling. * Add some docstrings. Code clean up. * Make rolling_cls and groupby_cls private. * Removed another old-style call for the superclass method. * Fixed some breaks made by the previous merge. * Moved ImplementedArrayRolling into rolling * Made DatasetRolling utilize DataArrayRolling of each DataArrays. * Added 1 line to api.rst. * Updating test_dataset to catch pydata#1278 * Test_dataset now tests bottleneck.move_var. * Raises an exception if no data_vars depend on the rolling-dims. * Fixed style issue and added docstring for inject_datasetrolling_methods * Recover unintended change. * An empty commit to trigger travicCI rebuild. * Recover test_assign_attrs which is deleted accidentaly. Some improvements based on shoyer's comments. * An empty commit to trigger Travis's rebuild
wlhendr · Mar 31, 2017 · 09ef2c2 · 09ef2c2
1 parent f2a5015
commit 09ef2c2
Show file tree

Hide file tree

Showing 9 changed files with 403 additions and 93 deletions.
diff --git a/doc/api.rst b/doc/api.rst
@@ -122,6 +122,7 @@ Computation
    Dataset.reduce
    Dataset.groupby
    Dataset.groupby_bins
+   Dataset.rolling
    Dataset.resample
    Dataset.diff
    Dataset.quantile

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -21,6 +21,8 @@ v0.9.2 (unreleased)
 
 Enhancements
 ~~~~~~~~~~~~
+- ``rolling`` on Dataset is now supported (:issue:`859`).
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - When bottleneck version 1.1 or later is installed, use bottleneck for rolling
   `var`, `argmin`, `argmax`, and `rank` computations. Also, `rolling.median`
   now also accepts a `min_periods` argument (:issue:`1276`).

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -69,63 +69,6 @@ def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
             and 'axis' arguments can be supplied."""
 
 
-class ImplementsRollingArrayReduce(object):
-    @classmethod
-    def _reduce_method(cls, func):
-        def wrapped_func(self, **kwargs):
-            return self.reduce(func, **kwargs)
-        return wrapped_func
-
-    @classmethod
-    def _bottleneck_reduce(cls, func):
-        def wrapped_func(self, **kwargs):
-            from .dataarray import DataArray
-
-            if isinstance(self.obj.data, dask_array_type):
-                raise NotImplementedError(
-                    'Rolling window operation does not work with dask arrays')
-
-            # bottleneck doesn't allow min_count to be 0, although it should
-            # work the same as if min_count = 1
-            if self.min_periods is not None and self.min_periods == 0:
-                min_count = self.min_periods + 1
-            else:
-                min_count = self.min_periods
-
-            values = func(self.obj.data, window=self.window,
-                          min_count=min_count, axis=self._axis_num)
-
-            result = DataArray(values, self.obj.coords)
-
-            if self.center:
-                result = self._center_result(result)
-
-            return result
-        return wrapped_func
-
-    @classmethod
-    def _bottleneck_reduce_without_min_count(cls, func):
-        def wrapped_func(self, **kwargs):
-            from .dataarray import DataArray
-
-            if self.min_periods is not None:
-                raise ValueError('Rolling.median does not accept min_periods')
-
-            if isinstance(self.obj.data, dask_array_type):
-                raise NotImplementedError(
-                    'Rolling window operation does not work with dask arrays')
-
-            values = func(self.obj.data, window=self.window, axis=self._axis_num)
-
-            result = DataArray(values, self.obj.coords)
-
-            if self.center:
-                result = self._center_result(result)
-
-            return result
-        return wrapped_func
-
-
 class AbstractArray(ImplementsArrayReduce, formatting.ReprMixin):
     """Shared base class for DataArray and Variable."""
 
@@ -449,7 +392,7 @@ def groupby(self, group, squeeze=True):
             A `GroupBy` object patterned after `pandas.GroupBy` that can be
             iterated over in the form of `(unique_value, grouped_array)` pairs.
         """
-        return self.groupby_cls(self, group, squeeze=squeeze)
+        return self._groupby_cls(self, group, squeeze=squeeze)
 
     def groupby_bins(self, group, bins, right=True, labels=None, precision=3,
                      include_lowest=False, squeeze=True):
@@ -498,10 +441,10 @@ def groupby_bins(self, group, bins, right=True, labels=None, precision=3,
         ----------
         .. [1] http://pandas.pydata.org/pandas-docs/stable/generated/pandas.cut.html
         """
-        return self.groupby_cls(self, group, squeeze=squeeze, bins=bins,
-                                cut_kwargs={'right': right, 'labels': labels,
-                                            'precision': precision,
-                                            'include_lowest': include_lowest})
+        return self._groupby_cls(self, group, squeeze=squeeze, bins=bins,
+                                 cut_kwargs={'right': right, 'labels': labels,
+                                             'precision': precision,
+                                             'include_lowest': include_lowest})
 
     def rolling(self, min_periods=None, center=False, **windows):
         """
@@ -530,8 +473,8 @@ def rolling(self, min_periods=None, center=False, **windows):
         rolling : type of input argument
         """
 
-        return self.rolling_cls(self, min_periods=min_periods,
-                                center=center, **windows)
+        return self._rolling_cls(self, min_periods=min_periods,
+                                 center=center, **windows)
 
     def resample(self, freq, dim, how='mean', skipna=None, closed=None,
                  label=None, base=0, keep_attrs=False):
@@ -601,7 +544,7 @@ def resample(self, freq, dim, how='mean', skipna=None, closed=None,
         group = DataArray(dim, [(RESAMPLE_DIM, dim)], name=RESAMPLE_DIM)
         time_grouper = pd.TimeGrouper(freq=freq, how=how, closed=closed,
                                       label=label, base=base)
-        gb = self.groupby_cls(self, group, grouper=time_grouper)
+        gb = self._groupby_cls(self, group, grouper=time_grouper)
         if isinstance(how, basestring):
             f = getattr(gb, how)
             if how in ['first', 'last']:

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -155,8 +155,8 @@ class DataArray(AbstractArray, BaseDataObject):
     attrs : OrderedDict
         Dictionary for holding arbitrary metadata.
     """
-    groupby_cls = groupby.DataArrayGroupBy
-    rolling_cls = rolling.DataArrayRolling
+    _groupby_cls = groupby.DataArrayGroupBy
+    _rolling_cls = rolling.DataArrayRolling
 
     def __init__(self, data, coords=None, dims=None, name=None,
                  attrs=None, encoding=None, fastpath=False):

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -13,6 +13,7 @@
 from . import ops
 from . import utils
 from . import groupby
+from . import rolling
 from . import indexing
 from . import alignment
 from . import formatting
@@ -306,7 +307,8 @@ class Dataset(Mapping, ImplementsDatasetReduce, BaseDataObject,
     One dimensional variables with name equal to their dimension are index
     coordinates used for label based indexing.
     """
-    groupby_cls = groupby.DatasetGroupBy
+    _groupby_cls = groupby.DatasetGroupBy
+    _rolling_cls = rolling.DatasetRolling
 
     def __init__(self, data_vars=None, coords=None, attrs=None,
                  compat='broadcast_equals'):

diff --git a/xarray/core/ops.py b/xarray/core/ops.py
@@ -259,7 +259,7 @@ def func(self, *args, **kwargs):
         """
 
 _ROLLING_REDUCE_DOCSTRING_TEMPLATE = \
-        """Reduce this DataArrayRolling's data windows by applying `{name}`
+        """Reduce this {da_or_ds}'s data windows by applying `{name}`
         along its dimension.
 
         Parameters
@@ -269,8 +269,8 @@ def func(self, *args, **kwargs):
 
         Returns
         -------
-        reduced : DataArray
-            New DataArray object with `{name}` applied along its rolling dimnension.
+        reduced : {da_or_ds}
+            New {da_or_ds} object with `{name}` applied along its rolling dimnension.
         """
 
 
@@ -522,7 +522,7 @@ def inject_bottleneck_rolling_methods(cls):
         func = cls._reduce_method(f)
         func.__name__ = name
         func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
-            name=func.__name__)
+            name=func.__name__, da_or_ds='DataArray')
         setattr(cls, name, func)
 
     # bottleneck rolling methods
@@ -541,7 +541,7 @@ def inject_bottleneck_rolling_methods(cls):
                 func = cls._bottleneck_reduce(f)
                 func.__name__ = method_name
                 func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
-                    name=func.__name__)
+                    name=func.__name__, da_or_ds='DataArray')
                 setattr(cls, method_name, func)
             except AttributeError as e:
                 # skip functions not in Bottleneck 1.0
@@ -558,5 +558,16 @@ def inject_bottleneck_rolling_methods(cls):
             func = cls._bottleneck_reduce_without_min_count(f)
         func.__name__ = 'median'
         func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
-            name=func.__name__)
+            name=func.__name__, da_or_ds='DataArray')
         setattr(cls, 'median', func)
+
+
+def inject_datasetrolling_methods(cls):
+    # standard numpy reduce methods
+    methods = [(name, globals()[name]) for name in NAN_REDUCE_METHODS]
+    for name, f in methods:
+        func = cls._reduce_method(f)
+        func.__name__ = name
+        func.__doc__ =  _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
+            name=func.__name__, da_or_ds='Dataset')
+        setattr(cls, name, func)