Skip to content

Commit

Permalink
depr(python): Rename DataFrame.apply to map_rows (pola-rs#10797)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Aug 30, 2023
1 parent fe9ff50 commit 43ab40d
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 55 deletions.
39 changes: 33 additions & 6 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5975,7 +5975,7 @@ def join(
.collect(no_optimization=True)
)

def apply(
def map_rows(
self,
function: Callable[[tuple[Any, ...]], Any],
return_dtype: PolarsDataType | None = None,
Expand Down Expand Up @@ -6030,7 +6030,7 @@ def apply(
Return a DataFrame by mapping each row to a tuple:
>>> df.apply(lambda t: (t[0] * 2, t[1] * 3))
>>> df.map_rows(lambda t: (t[0] * 2, t[1] * 3))
shape: (3, 2)
┌──────────┬──────────┐
│ column_0 ┆ column_1 │
Expand All @@ -6051,7 +6051,7 @@ def apply(
Return a DataFrame with a single column by mapping each row to a scalar:
>>> df.apply(lambda t: (t[0] * 2 + t[1])) # doctest: +SKIP
>>> df.map_rows(lambda t: (t[0] * 2 + t[1])) # doctest: +SKIP
shape: (3, 1)
┌───────┐
│ apply │
Expand All @@ -6069,10 +6069,10 @@ def apply(
"""
# TODO:
# from polars.utils.udfs import warn_on_inefficient_apply
# warn_on_inefficient_apply(function, columns=self.columns, apply_target="frame)
# from polars.utils.udfs import warn_on_inefficient_map
# warn_on_inefficient_map(function, columns=self.columns, map_target="frame)

out, is_df = self._df.apply(function, return_dtype, inference_size)
out, is_df = self._df.map_rows(function, return_dtype, inference_size)
if is_df:
return self._from_pydf(out)
else:
Expand Down Expand Up @@ -9908,6 +9908,33 @@ def groupby_dynamic(
check_sorted=check_sorted,
)

@deprecate_renamed_function("map_rows", version="0.19.0")
def apply(
self,
function: Callable[[tuple[Any, ...]], Any],
return_dtype: PolarsDataType | None = None,
*,
inference_size: int = 256,
) -> DataFrame:
"""
Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
.. deprecated:: 0.19.0
This method has been renamed to :func:`DataFrame.map_rows`.
Parameters
----------
function
Custom function or lambda.
return_dtype
Output type of the operation. If none given, Polars tries to infer the type.
inference_size
Only used in the case when the custom function returns rows.
This uses the first `n` rows to determine the output schema
"""
return self.map_rows(function, return_dtype, inference_size=inference_size)


def _prepare_other_arg(other: Any, length: int | None = None) -> Series:
# if not a series create singleton series such that it will broadcast
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,7 @@ impl PyDataFrame {
}

#[pyo3(signature = (lambda, output_type, inference_size))]
pub fn apply(
pub fn map_rows(
&mut self,
lambda: &PyAny,
output_type: Option<Wrap<DataType>>,
Expand Down
27 changes: 0 additions & 27 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,12 +942,6 @@ def test_fold_filter() -> None:
assert out.rows() == [(1, 0), (2, 1), (3, 2)]


def test_df_apply() -> None:
df = pl.DataFrame({"a": ["foo", "bar", "2"], "b": [1, 2, 3], "c": [1.0, 2.0, 3.0]})
out = df.apply(lambda x: len(x), None).to_series()
assert out.sum() == 9


def test_column_names() -> None:
tbl = pa.table(
{
Expand Down Expand Up @@ -1963,27 +1957,6 @@ def test_slicing() -> None:
)


def test_apply_list_return() -> None:
df = pl.DataFrame({"start": [1, 2], "end": [3, 5]})
out = df.apply(lambda r: pl.Series(range(r[0], r[1] + 1))).to_series()
assert out.to_list() == [[1, 2, 3], [2, 3, 4, 5]]


def test_apply_dataframe_return() -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": ["c", "d", None]})

out = df.apply(lambda row: (row[0] * 10, "foo", True, row[-1]))
expected = pl.DataFrame(
{
"column_0": [10, 20, 30],
"column_1": ["foo", "foo", "foo"],
"column_2": [True, True, True],
"column_3": ["c", "d", None],
}
)
assert_frame_equal(out, expected)


def test_group_by_cat_list() -> None:
grouped = (
pl.DataFrame(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,27 +361,6 @@ def test_map_elements_set_datetime_output_8984() -> None:
)["a"].to_list() == [payload]


def test_err_df_apply_return_type() -> None:
df = pl.DataFrame({"a": [[1, 2], [2, 3]], "b": [[4, 5], [6, 7]]})

def cmb(row: tuple[Any, ...]) -> list[Any]:
res = [x + y for x, y in zip(row[0], row[1])]
return [res]

with pytest.raises(pl.ComputeError, match="expected tuple, got list"):
df.apply(cmb)


def test_apply_shifted_chunks() -> None:
df = pl.DataFrame(pl.Series("texts", ["test", "test123", "tests"]))
assert df.select(
pl.col("texts"), pl.col("texts").shift(1).alias("texts_shifted")
).apply(lambda x: x).to_dict(False) == {
"column_0": ["test", "test123", "tests"],
"column_1": [None, "test", "test123"],
}


def test_map_elements_dict_order_10128() -> None:
df = pl.select(pl.lit("").map_elements(lambda x: {"c": 1, "b": 2, "a": 3}))
assert df.to_dict(False) == {"literal": [{"c": 1, "b": 2, "a": 3}]}
Expand Down
78 changes: 78 additions & 0 deletions py-polars/tests/unit/operations/map/test_map_rows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from __future__ import annotations

from typing import Any

import pytest

import polars as pl
from polars.testing import assert_frame_equal


def test_map_rows() -> None:
df = pl.DataFrame({"a": ["foo", "2"], "b": [1, 2], "c": [1.0, 2.0]})

result = df.map_rows(lambda x: len(x), None)

expected = pl.DataFrame({"apply": [3, 3]})
assert_frame_equal(result, expected)


def test_map_rows_list_return() -> None:
df = pl.DataFrame({"start": [1, 2], "end": [3, 5]})

result = df.map_rows(lambda r: pl.Series(range(r[0], r[1] + 1)))

expected = pl.DataFrame({"apply": [[1, 2, 3], [2, 3, 4, 5]]})
assert_frame_equal(result, expected)


def test_map_rows_dataframe_return() -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": ["c", "d", None]})

result = df.map_rows(lambda row: (row[0] * 10, "foo", True, row[-1]))

expected = pl.DataFrame(
{
"column_0": [10, 20, 30],
"column_1": ["foo", "foo", "foo"],
"column_2": [True, True, True],
"column_3": ["c", "d", None],
}
)
assert_frame_equal(result, expected)


def test_map_rows_error_return_type() -> None:
df = pl.DataFrame({"a": [[1, 2], [2, 3]], "b": [[4, 5], [6, 7]]})

def combine(row: tuple[Any, ...]) -> list[Any]:
res = [x + y for x, y in zip(row[0], row[1])]
return [res]

with pytest.raises(pl.ComputeError, match="expected tuple, got list"):
df.map_rows(combine)


def test_map_rows_shifted_chunks() -> None:
df = pl.DataFrame(pl.Series("texts", ["test", "test123", "tests"]))
df = df.select(pl.col("texts"), pl.col("texts").shift(1).alias("texts_shifted"))

result = df.map_rows(lambda x: x)

expected = pl.DataFrame(
{
"column_0": ["test", "test123", "tests"],
"column_1": [None, "test", "test123"],
}
)
assert_frame_equal(result, expected)


def test_apply_deprecated() -> None:
df = pl.DataFrame({"a": ["foo", "2"], "b": [1, 2], "c": [1.0, 2.0]})

with pytest.deprecated_call():
result = df.apply(lambda x: len(x), None)

expected = pl.DataFrame({"apply": [3, 3]})
assert_frame_equal(result, expected)

0 comments on commit 43ab40d

Please sign in to comment.