From 70c85340525dd3cce314bd498b1ae9e2c0482f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20Ockenfu=C3=9F?= <42680748+Ockenfuss@users.noreply.github.com> Date: Thu, 16 Mar 2023 19:55:56 +0100 Subject: [PATCH] Fix missing 'dim' argument in _get_nan_block_lengths (#7598) * Fix missing 'dim' argument in _get_nan_block_lengths * Add missing dim argument (GH7597) * Append a nan gap at the end of existing tests cases * Explicitly call 'dim' by keyword * Update Whats-new.rst * Preserve one row with valid values at the beginning * Update xarray/core/missing.py --------- Co-authored-by: Paul Ockenfuss Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 3 ++ xarray/core/missing.py | 2 +- xarray/tests/test_missing.py | 63 ++++++++++++++++++++++++------------ 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cdbb3335372..db9950c72d9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,6 +57,9 @@ Bug fixes - Fix matplotlib raising a UserWarning when plotting a scatter plot with an unfilled marker (:issue:`7313`, :pull:`7318`). By `Jimmy Westling `_. +- Fix issue with ``max_gap`` in ``interpolate_na``, when applied to + multidimensional arrays. (:issue:`7597`, :pull:`7598`). + By `Paul Ockenfuß `_. - Improved performance in ``open_dataset`` for datasets with large object arrays (:issue:`7484`, :pull:`7494`). By `Alex Goodman `_ and `Deepak Cherian `_. - Fix :py:meth:`DataArray.plot.pcolormesh` which now works if one of the coordinates has str dtype (:issue:`6775`, :pull:`7612`). diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 1caa79a7dfd..d7f0be5fa08 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -48,7 +48,7 @@ def _get_nan_block_lengths( .where(valid) .bfill(dim=dim) .where(~valid, 0) - .fillna(index[-1] - valid_arange.max()) + .fillna(index[-1] - valid_arange.max(dim=[dim])) ) return nan_block_lengths diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 83ed1aace2f..a6b6b1f80ce 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -542,19 +542,28 @@ def test_bfill_dataset(ds): @requires_bottleneck @pytest.mark.parametrize( - "y, lengths", + "y, lengths_expected", [ - [np.arange(9), [[3, 3, 3, 0, 3, 3, 0, 2, 2]]], - [np.arange(9) * 3, [[9, 9, 9, 0, 9, 9, 0, 6, 6]]], - [[0, 2, 5, 6, 7, 8, 10, 12, 14], [[6, 6, 6, 0, 4, 4, 0, 4, 4]]], + [np.arange(9), [[1, 0, 7, 7, 7, 7, 7, 7, 0], [3, 3, 3, 0, 3, 3, 0, 2, 2]]], + [ + np.arange(9) * 3, + [[3, 0, 21, 21, 21, 21, 21, 21, 0], [9, 9, 9, 0, 9, 9, 0, 6, 6]], + ], + [ + [0, 2, 5, 6, 7, 8, 10, 12, 14], + [[2, 0, 12, 12, 12, 12, 12, 12, 0], [6, 6, 6, 0, 4, 4, 0, 4, 4]], + ], ], ) -def test_interpolate_na_nan_block_lengths(y, lengths): - arr = [[np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan]] - da = xr.DataArray(arr * 2, dims=["x", "y"], coords={"x": [0, 1], "y": y}) +def test_interpolate_na_nan_block_lengths(y, lengths_expected): + arr = [ + [np.nan, 1, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 4], + [np.nan, np.nan, np.nan, 1, np.nan, np.nan, 4, np.nan, np.nan], + ] + da = xr.DataArray(arr, dims=["x", "y"], coords={"x": [0, 1], "y": y}) index = get_clean_interp_index(da, dim="y", use_coordinate=True) actual = _get_nan_block_lengths(da, dim="y", index=index) - expected = da.copy(data=lengths * 2) + expected = da.copy(data=lengths_expected) assert_equal(actual, expected) @@ -660,16 +669,17 @@ def test_interpolate_na_max_gap_time_specifier( "coords", [ pytest.param(None, marks=pytest.mark.xfail()), - {"x": np.arange(4), "y": np.arange(11)}, + {"x": np.arange(4), "y": np.arange(12)}, ], ) def test_interpolate_na_2d(coords): + n = np.nan da = xr.DataArray( [ - [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], - [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], - [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], - [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], ], dims=["x", "y"], coords=coords, @@ -678,21 +688,32 @@ def test_interpolate_na_2d(coords): actual = da.interpolate_na("y", max_gap=2) expected_y = da.copy( data=[ - [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11], - [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], - [1, 2, 3, np.nan, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], - [1, 2, 3, 4, 5, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, 5, 6, n, n, n, 10, 11, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, 2, 3, 4, 5, 6, n, n, n, 10, 11, n], ] ) assert_equal(actual, expected_y) + actual = da.interpolate_na("y", max_gap=1, fill_value="extrapolate") + expected_y_extra = da.copy( + data=[ + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, 12], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [n, n, 3, n, n, 6, n, n, n, 10, n, n], + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, 12], + ] + ) + assert_equal(actual, expected_y_extra) + actual = da.interpolate_na("x", max_gap=3) expected_x = xr.DataArray( [ - [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], - [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], - [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], - [1, 2, 3, 4, np.nan, 6, 7, np.nan, np.nan, np.nan, 11], + [1, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], + [n, 2, 3, 4, n, 6, n, n, n, 10, 11, n], ], dims=["x", "y"], coords=coords,