depr(python): Rename DataFrame.apply to map_rows (pola-rs#10797)

bfeif · Aug 30, 2023 · 43ab40d · 43ab40d
1 parent fe9ff50
commit 43ab40d
Show file tree

Hide file tree

Showing 5 changed files with 112 additions and 55 deletions.
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -5975,7 +5975,7 @@ def join(
             .collect(no_optimization=True)
         )
 
-    def apply(
+    def map_rows(
         self,
         function: Callable[[tuple[Any, ...]], Any],
         return_dtype: PolarsDataType | None = None,
@@ -6030,7 +6030,7 @@ def apply(
 
         Return a DataFrame by mapping each row to a tuple:
 
-        >>> df.apply(lambda t: (t[0] * 2, t[1] * 3))
+        >>> df.map_rows(lambda t: (t[0] * 2, t[1] * 3))
         shape: (3, 2)
         ┌──────────┬──────────┐
         │ column_0 ┆ column_1 │
@@ -6051,7 +6051,7 @@ def apply(
 
         Return a DataFrame with a single column by mapping each row to a scalar:
 
-        >>> df.apply(lambda t: (t[0] * 2 + t[1]))  # doctest: +SKIP
+        >>> df.map_rows(lambda t: (t[0] * 2 + t[1]))  # doctest: +SKIP
         shape: (3, 1)
         ┌───────┐
         │ apply │
@@ -6069,10 +6069,10 @@ def apply(
 
         """
         # TODO:
-        # from polars.utils.udfs import warn_on_inefficient_apply
-        # warn_on_inefficient_apply(function, columns=self.columns, apply_target="frame)
+        # from polars.utils.udfs import warn_on_inefficient_map
+        # warn_on_inefficient_map(function, columns=self.columns, map_target="frame)
 
-        out, is_df = self._df.apply(function, return_dtype, inference_size)
+        out, is_df = self._df.map_rows(function, return_dtype, inference_size)
         if is_df:
             return self._from_pydf(out)
         else:
@@ -9908,6 +9908,33 @@ def groupby_dynamic(
             check_sorted=check_sorted,
         )
 
+    @deprecate_renamed_function("map_rows", version="0.19.0")
+    def apply(
+        self,
+        function: Callable[[tuple[Any, ...]], Any],
+        return_dtype: PolarsDataType | None = None,
+        *,
+        inference_size: int = 256,
+    ) -> DataFrame:
+        """
+        Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
+
+        .. deprecated:: 0.19.0
+            This method has been renamed to :func:`DataFrame.map_rows`.
+
+        Parameters
+        ----------
+        function
+            Custom function or lambda.
+        return_dtype
+            Output type of the operation. If none given, Polars tries to infer the type.
+        inference_size
+            Only used in the case when the custom function returns rows.
+            This uses the first `n` rows to determine the output schema
+
+        """
+        return self.map_rows(function, return_dtype, inference_size=inference_size)
+
 
 def _prepare_other_arg(other: Any, length: int | None = None) -> Series:
     # if not a series create singleton series such that it will broadcast

diff --git a/py-polars/src/dataframe.rs b/py-polars/src/dataframe.rs
@@ -1349,7 +1349,7 @@ impl PyDataFrame {
     }
 
     #[pyo3(signature = (lambda, output_type, inference_size))]
-    pub fn apply(
+    pub fn map_rows(
         &mut self,
         lambda: &PyAny,
         output_type: Option<Wrap<DataType>>,

diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
@@ -942,12 +942,6 @@ def test_fold_filter() -> None:
     assert out.rows() == [(1, 0), (2, 1), (3, 2)]
 
 
-def test_df_apply() -> None:
-    df = pl.DataFrame({"a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0]})
-    out = df.apply(lambda x: len(x), None).to_series()
-    assert out.sum() == 9
-
-
 def test_column_names() -> None:
     tbl = pa.table(
         {
@@ -1963,27 +1957,6 @@ def test_slicing() -> None:
     )
 
 
-def test_apply_list_return() -> None:
-    df = pl.DataFrame({"start": [1, 2], "end": [3, 5]})
-    out = df.apply(lambda r: pl.Series(range(r[0], r[1] + 1))).to_series()
-    assert out.to_list() == [[1, 2, 3], [2, 3, 4, 5]]
-
-
-def test_apply_dataframe_return() -> None:
-    df = pl.DataFrame({"a": [1, 2, 3], "b": ["c", "d", None]})
-
-    out = df.apply(lambda row: (row[0] * 10, "foo", True, row[-1]))
-    expected = pl.DataFrame(
-        {
-            "column_0": [10, 20, 30],
-            "column_1": ["foo", "foo", "foo"],
-            "column_2": [True, True, True],
-            "column_3": ["c", "d", None],
-        }
-    )
-    assert_frame_equal(out, expected)
-
-
 def test_group_by_cat_list() -> None:
     grouped = (
         pl.DataFrame(

diff --git a/py-polars/tests/unit/operations/test_map.py → ...ars/tests/unit/operations/map/test_map.py b/py-polars/tests/unit/operations/test_map.py → ...ars/tests/unit/operations/map/test_map.py
@@ -361,27 +361,6 @@ def test_map_elements_set_datetime_output_8984() -> None:
     )["a"].to_list() == [payload]
 
 
-def test_err_df_apply_return_type() -> None:
-    df = pl.DataFrame({"a": [[1, 2], [2, 3]], "b": [[4, 5], [6, 7]]})
-
-    def cmb(row: tuple[Any, ...]) -> list[Any]:
-        res = [x + y for x, y in zip(row[0], row[1])]
-        return [res]
-
-    with pytest.raises(pl.ComputeError, match="expected tuple, got list"):
-        df.apply(cmb)
-
-
-def test_apply_shifted_chunks() -> None:
-    df = pl.DataFrame(pl.Series("texts", ["test", "test123", "tests"]))
-    assert df.select(
-        pl.col("texts"), pl.col("texts").shift(1).alias("texts_shifted")
-    ).apply(lambda x: x).to_dict(False) == {
-        "column_0": ["test", "test123", "tests"],
-        "column_1": [None, "test", "test123"],
-    }
-
-
 def test_map_elements_dict_order_10128() -> None:
     df = pl.select(pl.lit("").map_elements(lambda x: {"c": 1, "b": 2, "a": 3}))
     assert df.to_dict(False) == {"literal": [{"c": 1, "b": 2, "a": 3}]}

diff --git a/py-polars/tests/unit/operations/map/test_map_rows.py b/py-polars/tests/unit/operations/map/test_map_rows.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+import polars as pl
+from polars.testing import assert_frame_equal
+
+
+def test_map_rows() -> None:
+    df = pl.DataFrame({"a": ["foo", "2"], "b": [1, 2], "c": [1.0, 2.0]})
+
+    result = df.map_rows(lambda x: len(x), None)
+
+    expected = pl.DataFrame({"apply": [3, 3]})
+    assert_frame_equal(result, expected)
+
+
+def test_map_rows_list_return() -> None:
+    df = pl.DataFrame({"start": [1, 2], "end": [3, 5]})
+
+    result = df.map_rows(lambda r: pl.Series(range(r[0], r[1] + 1)))
+
+    expected = pl.DataFrame({"apply": [[1, 2, 3], [2, 3, 4, 5]]})
+    assert_frame_equal(result, expected)
+
+
+def test_map_rows_dataframe_return() -> None:
+    df = pl.DataFrame({"a": [1, 2, 3], "b": ["c", "d", None]})
+
+    result = df.map_rows(lambda row: (row[0] * 10, "foo", True, row[-1]))
+
+    expected = pl.DataFrame(
+        {
+            "column_0": [10, 20, 30],
+            "column_1": ["foo", "foo", "foo"],
+            "column_2": [True, True, True],
+            "column_3": ["c", "d", None],
+        }
+    )
+    assert_frame_equal(result, expected)
+
+
+def test_map_rows_error_return_type() -> None:
+    df = pl.DataFrame({"a": [[1, 2], [2, 3]], "b": [[4, 5], [6, 7]]})
+
+    def combine(row: tuple[Any, ...]) -> list[Any]:
+        res = [x + y for x, y in zip(row[0], row[1])]
+        return [res]
+
+    with pytest.raises(pl.ComputeError, match="expected tuple, got list"):
+        df.map_rows(combine)
+
+
+def test_map_rows_shifted_chunks() -> None:
+    df = pl.DataFrame(pl.Series("texts", ["test", "test123", "tests"]))
+    df = df.select(pl.col("texts"), pl.col("texts").shift(1).alias("texts_shifted"))
+
+    result = df.map_rows(lambda x: x)
+
+    expected = pl.DataFrame(
+        {
+            "column_0": ["test", "test123", "tests"],
+            "column_1": [None, "test", "test123"],
+        }
+    )
+    assert_frame_equal(result, expected)
+
+
+def test_apply_deprecated() -> None:
+    df = pl.DataFrame({"a": ["foo", "2"], "b": [1, 2], "c": [1.0, 2.0]})
+
+    with pytest.deprecated_call():
+        result = df.apply(lambda x: len(x), None)
+
+    expected = pl.DataFrame({"apply": [3, 3]})
+    assert_frame_equal(result, expected)