Skip to content

Commit

Permalink
Refactor out coarsen tests (pydata#5474)
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian authored Jun 21, 2021
1 parent 11f74f1 commit 5381962
Show file tree
Hide file tree
Showing 4 changed files with 303 additions and 291 deletions.
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ ignore =
E501 # line too long - let black worry about that
E731 # do not assign a lambda expression, use a def
W503 # line break before binary operator
per-file-ignores =
xarray/tests/*.py:F401,F811
exclude=
.eggs
doc
Expand Down
301 changes: 301 additions & 0 deletions xarray/tests/test_coarsen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
import numpy as np
import pandas as pd
import pytest

import xarray as xr
from xarray import DataArray, Dataset, set_options

from . import assert_allclose, assert_equal, has_dask, requires_cftime
from .test_dataarray import da
from .test_dataset import ds


def test_coarsen_absent_dims_error(ds):
with pytest.raises(ValueError, match=r"not found in Dataset."):
ds.coarsen(foo=2)


@pytest.mark.parametrize("dask", [True, False])
@pytest.mark.parametrize(("boundary", "side"), [("trim", "left"), ("pad", "right")])
def test_coarsen_dataset(ds, dask, boundary, side):
if dask and has_dask:
ds = ds.chunk({"x": 4})

actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max()
assert_equal(
actual["z1"], ds["z1"].coarsen(x=3, boundary=boundary, side=side).max()
)
# coordinate should be mean by default
assert_equal(
actual["time"], ds["time"].coarsen(time=2, boundary=boundary, side=side).mean()
)


@pytest.mark.parametrize("dask", [True, False])
def test_coarsen_coords(ds, dask):
if dask and has_dask:
ds = ds.chunk({"x": 4})

# check if coord_func works
actual = ds.coarsen(time=2, x=3, boundary="trim", coord_func={"time": "max"}).max()
assert_equal(actual["z1"], ds["z1"].coarsen(x=3, boundary="trim").max())
assert_equal(actual["time"], ds["time"].coarsen(time=2, boundary="trim").max())

# raise if exact
with pytest.raises(ValueError):
ds.coarsen(x=3).mean()
# should be no error
ds.isel(x=slice(0, 3 * (len(ds["x"]) // 3))).coarsen(x=3).mean()

# working test with pd.time
da = xr.DataArray(
np.linspace(0, 365, num=364),
dims="time",
coords={"time": pd.date_range("15/12/1999", periods=364)},
)
actual = da.coarsen(time=2).mean()


@requires_cftime
def test_coarsen_coords_cftime():
times = xr.cftime_range("2000", periods=6)
da = xr.DataArray(range(6), [("time", times)])
actual = da.coarsen(time=3).mean()
expected_times = xr.cftime_range("2000-01-02", freq="3D", periods=2)
np.testing.assert_array_equal(actual.time, expected_times)


@pytest.mark.parametrize(
"funcname, argument",
[
("reduce", (np.mean,)),
("mean", ()),
],
)
def test_coarsen_keep_attrs(funcname, argument):
global_attrs = {"units": "test", "long_name": "testing"}
da_attrs = {"da_attr": "test"}
attrs_coords = {"attrs_coords": "test"}
da_not_coarsend_attrs = {"da_not_coarsend_attr": "test"}

data = np.linspace(10, 15, 100)
coords = np.linspace(1, 10, 100)

ds = Dataset(
data_vars={
"da": ("coord", data, da_attrs),
"da_not_coarsend": ("no_coord", data, da_not_coarsend_attrs),
},
coords={"coord": ("coord", coords, attrs_coords)},
attrs=global_attrs,
)

# attrs are now kept per default
func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
result = func(*argument)
assert result.attrs == global_attrs
assert result.da.attrs == da_attrs
assert result.da_not_coarsend.attrs == da_not_coarsend_attrs
assert result.coord.attrs == attrs_coords
assert result.da.name == "da"
assert result.da_not_coarsend.name == "da_not_coarsend"

# discard attrs
func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
result = func(*argument, keep_attrs=False)
assert result.attrs == {}
assert result.da.attrs == {}
assert result.da_not_coarsend.attrs == {}
assert result.coord.attrs == {}
assert result.da.name == "da"
assert result.da_not_coarsend.name == "da_not_coarsend"

# test discard attrs using global option
func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
with set_options(keep_attrs=False):
result = func(*argument)

assert result.attrs == {}
assert result.da.attrs == {}
assert result.da_not_coarsend.attrs == {}
assert result.coord.attrs == {}
assert result.da.name == "da"
assert result.da_not_coarsend.name == "da_not_coarsend"

# keyword takes precedence over global option
func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
with set_options(keep_attrs=False):
result = func(*argument, keep_attrs=True)

assert result.attrs == global_attrs
assert result.da.attrs == da_attrs
assert result.da_not_coarsend.attrs == da_not_coarsend_attrs
assert result.coord.attrs == attrs_coords
assert result.da.name == "da"
assert result.da_not_coarsend.name == "da_not_coarsend"

func = getattr(ds.coarsen(dim={"coord": 5}), funcname)
with set_options(keep_attrs=True):
result = func(*argument, keep_attrs=False)

assert result.attrs == {}
assert result.da.attrs == {}
assert result.da_not_coarsend.attrs == {}
assert result.coord.attrs == {}
assert result.da.name == "da"
assert result.da_not_coarsend.name == "da_not_coarsend"


def test_coarsen_keep_attrs_deprecated():
global_attrs = {"units": "test", "long_name": "testing"}
attrs_da = {"da_attr": "test"}

data = np.linspace(10, 15, 100)
coords = np.linspace(1, 10, 100)

ds = Dataset(
data_vars={"da": ("coord", data)},
coords={"coord": coords},
attrs=global_attrs,
)
ds.da.attrs = attrs_da

# deprecated option
with pytest.warns(
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
):
result = ds.coarsen(dim={"coord": 5}, keep_attrs=False).mean()

assert result.attrs == {}
assert result.da.attrs == {}

# the keep_attrs in the reduction function takes precedence
with pytest.warns(
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
):
result = ds.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False)

assert result.attrs == {}
assert result.da.attrs == {}


@pytest.mark.slow
@pytest.mark.parametrize("ds", (1, 2), indirect=True)
@pytest.mark.parametrize("window", (1, 2, 3, 4))
@pytest.mark.parametrize("name", ("sum", "mean", "std", "var", "min", "max", "median"))
def test_coarsen_reduce(ds, window, name):
# Use boundary="trim" to accomodate all window sizes used in tests
coarsen_obj = ds.coarsen(time=window, boundary="trim")

# add nan prefix to numpy methods to get similar behavior as bottleneck
actual = coarsen_obj.reduce(getattr(np, f"nan{name}"))
expected = getattr(coarsen_obj, name)()
assert_allclose(actual, expected)

# make sure the order of data_var are not changed.
assert list(ds.data_vars.keys()) == list(actual.data_vars.keys())

# Make sure the dimension order is restored
for key, src_var in ds.data_vars.items():
assert src_var.dims == actual[key].dims


@pytest.mark.parametrize(
"funcname, argument",
[
("reduce", (np.mean,)),
("mean", ()),
],
)
def test_coarsen_da_keep_attrs(funcname, argument):
attrs_da = {"da_attr": "test"}
attrs_coords = {"attrs_coords": "test"}

data = np.linspace(10, 15, 100)
coords = np.linspace(1, 10, 100)

da = DataArray(
data,
dims=("coord"),
coords={"coord": ("coord", coords, attrs_coords)},
attrs=attrs_da,
name="name",
)

# attrs are now kept per default
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
result = func(*argument)
assert result.attrs == attrs_da
da.coord.attrs == attrs_coords
assert result.name == "name"

# discard attrs
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
result = func(*argument, keep_attrs=False)
assert result.attrs == {}
da.coord.attrs == {}
assert result.name == "name"

# test discard attrs using global option
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
with set_options(keep_attrs=False):
result = func(*argument)
assert result.attrs == {}
da.coord.attrs == {}
assert result.name == "name"

# keyword takes precedence over global option
func = getattr(da.coarsen(dim={"coord": 5}), funcname)
with set_options(keep_attrs=False):
result = func(*argument, keep_attrs=True)
assert result.attrs == attrs_da
da.coord.attrs == {}
assert result.name == "name"

func = getattr(da.coarsen(dim={"coord": 5}), funcname)
with set_options(keep_attrs=True):
result = func(*argument, keep_attrs=False)
assert result.attrs == {}
da.coord.attrs == {}
assert result.name == "name"


def test_coarsen_da_keep_attrs_deprecated():
attrs_da = {"da_attr": "test"}

data = np.linspace(10, 15, 100)
coords = np.linspace(1, 10, 100)

da = DataArray(data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da)

# deprecated option
with pytest.warns(
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
):
result = da.coarsen(dim={"coord": 5}, keep_attrs=False).mean()

assert result.attrs == {}

# the keep_attrs in the reduction function takes precedence
with pytest.warns(
FutureWarning, match="Passing ``keep_attrs`` to ``coarsen`` is deprecated"
):
result = da.coarsen(dim={"coord": 5}, keep_attrs=True).mean(keep_attrs=False)

assert result.attrs == {}


@pytest.mark.parametrize("da", (1, 2), indirect=True)
@pytest.mark.parametrize("window", (1, 2, 3, 4))
@pytest.mark.parametrize("name", ("sum", "mean", "std", "max"))
def test_coarsen_da_reduce(da, window, name):
if da.isnull().sum() > 1 and window == 1:
pytest.skip("These parameters lead to all-NaN slices")

# Use boundary="trim" to accomodate all window sizes used in tests
coarsen_obj = da.coarsen(time=window, boundary="trim")

# add nan prefix to numpy methods to get similar # behavior as bottleneck
actual = coarsen_obj.reduce(getattr(np, f"nan{name}"))
expected = getattr(coarsen_obj, name)()
assert_allclose(actual, expected)
Loading

0 comments on commit 5381962

Please sign in to comment.