Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
f169b68
Initial test case
eicchen Jul 10, 2025
fe7e8c8
Updated test case to account for results of mul being NaN if both inp…
eicchen Jul 10, 2025
5421761
Removed test cases which expect an error from fill_value
eicchen Jul 10, 2025
32a0f77
Updated test case to include other operators which included fill_value
eicchen Jul 10, 2025
42a8b76
Removed restriction on using fill_value with series
eicchen Jul 10, 2025
654c2f3
Included PR suggestions, added seperate dtype test (WIP)
eicchen Jul 15, 2025
a360daf
temp files
eicchen Jul 16, 2025
e72c128
Added test case to test EA and NUMPY dtypes
eicchen Aug 18, 2025
51b0898
addressed changes brought up in PR, converted test cases to not use n…
eicchen Aug 21, 2025
a230b57
Limit np conversion to IntegerArray and FloatArray
eicchen Aug 21, 2025
3207057
Updated EA catch method in _maybe_align_series_as_frame
eicchen Aug 21, 2025
eb753a2
Addressed errors from changes in som tests
eicchen Aug 21, 2025
550ddde
removed comment and errant print statement
eicchen Aug 21, 2025
23afb07
Commented out test_add_frame's xfail to test CI
eicchen Aug 23, 2025
81f4f18
Allows frames to be added to strings, with modifications to tests tha…
eicchen Aug 25, 2025
4a9f4db
Moved type conversion within add and radd if statement, removed datea…
eicchen Aug 26, 2025
76df452
Removed PeriodArray special casing and modified test case
eicchen Aug 27, 2025
17664e2
ENH: fill_value in frame+series flex ops
jbrockmendel Sep 11, 2025
23767fe
Fixed issue by adding a conversion clause to _cmp_method in string_.py
eicchen Sep 24, 2025
5e69871
Added testcases for StringArray addition and fixes
eicchen Sep 29, 2025
5380aba
Fixed regex modified during pre-commit
eicchen Sep 29, 2025
01e4959
Added FUTURE_INFER_STRING catch, edited documentation
eicchen Sep 29, 2025
cfb4e25
Edited documentation due to docstring error
eicchen Sep 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ Other enhancements
- :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`)
- :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`)
- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
- :class:`StringDtype` now supports addition to Series/DataFrame with floats, ints, and strings (:issue:`61581`)
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
Expand Down Expand Up @@ -227,7 +228,6 @@ Other enhancements
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
- Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_300.notable_bug_fixes:
Expand Down Expand Up @@ -998,6 +998,7 @@ MultiIndex
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` now works with ``fill_value`` parameter (:issue:`61581`)
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
- Bug in :meth:`DataFrame.__setitem__` where column alignment logic would reindex the assigned value with an empty index, incorrectly setting all values to ``NaN``.(:issue:`61841`)
- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` where reindexing :class:`Index` to a :class:`MultiIndex` would incorrectly set all values to ``NaN``.(:issue:`60923`)
Expand Down
51 changes: 40 additions & 11 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,25 +890,54 @@ def _op_method_error_message(self, other, op) -> str:
def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
pa_type = self._pa_array.type
other_original = other
other = self._box_pa(other)
try:
other = self._box_pa(other)
except pa.lib.ArrowTypeError:
# was expecting time dtype but received non-temporal dtype (time offset)
from pandas.core.tools.timedeltas import to_timedelta

other = self._box_pa(to_timedelta(other))
except ValueError as err:
raise TypeError(
"Incompatible type when converting to PyArrow dtype for operation."
) from err

if (
pa.types.is_string(pa_type)
or pa.types.is_large_string(pa_type)
or pa.types.is_binary(pa_type)
):
if op in [operator.add, roperator.radd]:
sep = pa.scalar("", type=pa_type)
try:
if op is operator.add:
result = pc.binary_join_element_wise(self._pa_array, other, sep)
elif op is roperator.radd:
result = pc.binary_join_element_wise(other, self._pa_array, sep)
except pa.ArrowNotImplementedError as err:
# pyarrow gets upset if you try to join a NullArray
if (
pa.types.is_integer(other.type)
or pa.types.is_floating(other.type)
or pa.types.is_null(other.type)
or pa.types.is_string(other.type)
or pa.types.is_large_string(other.type)
or pa.types.is_binary(other.type)
):
other = other.cast(pa_type)
sep = pa.scalar("", type=pa_type)
try:
if op is operator.add:
result = pc.binary_join_element_wise(
self._pa_array, other, sep
)
elif op is roperator.radd:
result = pc.binary_join_element_wise(
other, self._pa_array, sep
)
except pa.ArrowNotImplementedError as err:
raise TypeError(
self._op_method_error_message(other_original, op)
) from err
return self._from_pyarrow_array(result)
else:
raise TypeError(
self._op_method_error_message(other_original, op)
) from err
return self._from_pyarrow_array(result)
"Can only add string arrays to dtypes "
"null, int, float, str, and binary."
)
elif op in [operator.mul, roperator.rmul]:
binary = self._pa_array
integral = other
Expand Down
19 changes: 19 additions & 0 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from pandas.core.dtypes.common import (
is_array_like,
is_bool_dtype,
is_float_dtype,
is_integer_dtype,
is_object_dtype,
is_string_dtype,
Expand Down Expand Up @@ -1110,10 +1111,28 @@ def _cmp_method(self, other, op):
if op.__name__ in ops.ARITHMETIC_BINOPS:
result = np.empty_like(self._ndarray, dtype="object")
result[mask] = self.dtype.na_value
if op.__name__ in ["add", "radd"]:
if isinstance(other, str) or is_string_dtype(other):
pass
elif is_float_dtype(other) or is_integer_dtype(other):
if is_float_dtype(other):
# Shorten whole number floats to match pyarrow behavior
other = [
str(int(x)) if x.is_integer() else str(x) for x in other
]
else:
other = other.astype(str)
else:
raise TypeError(
f"Only supports op({op.__name__}) between StringArray and "
"dtypes int, float, and str."
)

result[valid] = op(self._ndarray[valid], other)
if isinstance(other, Path):
# GH#61940
return result

return self._from_backing_data(result)
else:
# logical
Expand Down
48 changes: 25 additions & 23 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8468,27 +8468,34 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
blockwise.
"""
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype in ("datetime64[ns]", "timedelta64[ns]"):
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
if lib.is_np_dtype(rvalues.dtype):
# We can losslessly+cheaply cast to ndarray
# i.e. ndarray or dt64[naive], td64
# TODO(EA2D): no need to special case with 2D EAs
rvalues = np.asarray(rvalues)

if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
return series
rvalues = rvalues.reshape(1, -1)

if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
rvalues = rvalues.reshape(1, -1)
rvalues = np.broadcast_to(rvalues, self.shape)
# pass dtype to avoid doing inference
df = self._constructor(rvalues, dtype=rvalues.dtype)

rvalues = np.broadcast_to(rvalues, self.shape)
# pass dtype to avoid doing inference
return self._constructor(
rvalues,
index=self.index,
columns=self.columns,
dtype=rvalues.dtype,
).__finalize__(series)
else:
# GH#61581
if axis == 0:
df = DataFrame(dict.fromkeys(range(self.shape[1]), rvalues))
else:
nrows = self.shape[0]
df = DataFrame(
{i: rvalues[[i]].repeat(nrows) for i in range(self.shape[1])},
dtype=rvalues.dtype,
)
df.index = self.index
df.columns = self.columns
return df.__finalize__(series)

def _flex_arith_method(
self, other, op, *, axis: Axis = "columns", level=None, fill_value=None
Expand All @@ -8498,11 +8505,6 @@ def _flex_arith_method(
if self._should_reindex_frame_op(other, op, axis, fill_value, level):
return self._arith_method_with_reindex(other, op)

if isinstance(other, Series) and fill_value is not None:
# TODO: We could allow this in cases where we end up going
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")

other = ops.maybe_prepare_scalar_for_op(other, self.shape)
self, other = self._align_for_op(other, axis, flex=True, level=level)

Expand Down
17 changes: 3 additions & 14 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,12 +1361,7 @@ def test_period_add_timestamp_raises(self, box_with_array):
arr + ts
with pytest.raises(TypeError, match=msg):
ts + arr
if box_with_array is pd.DataFrame:
# TODO: before implementing resolution-inference we got the same
# message with DataFrame and non-DataFrame. Why did that change?
msg = "cannot add PeriodArray and Timestamp"
else:
msg = "cannot add PeriodArray and DatetimeArray"
msg = "cannot add PeriodArray and DatetimeArray"
with pytest.raises(TypeError, match=msg):
arr + Series([ts])
with pytest.raises(TypeError, match=msg):
Expand All @@ -1376,16 +1371,10 @@ def test_period_add_timestamp_raises(self, box_with_array):
with pytest.raises(TypeError, match=msg):
pd.Index([ts]) + arr

if box_with_array is pd.DataFrame:
msg = "cannot add PeriodArray and DatetimeArray"
else:
msg = r"unsupported operand type\(s\) for \+: 'Period' and 'DatetimeArray"
msg = "cannot add PeriodArray and DatetimeArray"

with pytest.raises(TypeError, match=msg):
arr + pd.DataFrame([ts])
if box_with_array is pd.DataFrame:
msg = "cannot add PeriodArray and DatetimeArray"
else:
msg = r"unsupported operand type\(s\) for \+: 'DatetimeArray' and 'Period'"
with pytest.raises(TypeError, match=msg):
pd.DataFrame([ts]) + arr

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Timestamp("20180101"))

# invalid array-likes
if op not in ("__mul__", "__rmul__"):
if op not in ("__mul__", "__rmul__", "__add__", "__radd__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
msg = "|".join(
[
Expand Down
39 changes: 37 additions & 2 deletions pandas/tests/arrays/floating/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def test_error_invalid_values(data, all_arithmetic_operators):
"not implemented",
"not supported for dtype",
"Can only string multiply by an integer",
"can't multiply sequence by non-int of type 'str'",
]
)
with pytest.raises(TypeError, match=msg):
Expand All @@ -152,8 +153,42 @@ def test_error_invalid_values(data, all_arithmetic_operators):
ops(pd.Timestamp("20180101"))

# invalid array-likes
with pytest.raises(TypeError, match=msg):
ops(pd.Series("foo", index=s.index))
str_ser = pd.Series("foo", index=s.index)
if (
all_arithmetic_operators
in [
"__add__",
"__radd__",
]
and pd.options.future.infer_string
):
res = ops(str_ser)
if all_arithmetic_operators == "__radd__":
data_expected = []
for i in data:
if pd.isna(i):
data_expected.append(i)
elif i.is_integer():
data_expected.append("foo" + str(int(i)))
else:
data_expected.append("foo" + str(i))

expected = pd.Series(data_expected, index=s.index)
else:
data_expected = []
for i in data:
if pd.isna(i):
data_expected.append(i)
elif i.is_integer():
data_expected.append(str(int(i)) + "foo")
else:
data_expected.append(str(i) + "foo")

expected = pd.Series(data_expected, index=s.index)
tm.assert_series_equal(res, expected)
else:
with pytest.raises(TypeError, match=msg):
ops(str_ser)

msg = "|".join(
[
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,26 @@ def test_error_invalid_values(data, all_arithmetic_operators):
# assert_almost_equal stricter, but the expected with pd.NA seems
# more-correct than np.nan here.
tm.assert_series_equal(res, expected)
elif (
all_arithmetic_operators
in [
"__add__",
"__radd__",
]
and pd.options.future.infer_string
):
res = ops(str_ser)
if all_arithmetic_operators == "__radd__":
expected = pd.Series(
[np.nan if pd.isna(x) == 1 else "foo" + str(x) for x in data],
index=s.index,
)
else:
expected = pd.Series(
[np.nan if pd.isna(x) == 1 else str(x) + "foo" for x in data],
index=s.index,
)
tm.assert_series_equal(res, expected)
else:
with tm.external_error_raised(TypeError):
ops(str_ser)
Expand Down
Loading
Loading