diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b551e7ded0178..9cb5a3e17f527 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -157,3 +157,5 @@ jobs: run: | source activate pandas-dev pytest pandas/tests/frame/methods --array-manager + pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean --array-manager + pytest pandas/tests/frame/indexing/test_where.py --array-manager diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3621539a3f0c3..50579d8ca0787 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -621,6 +621,10 @@ def _as_manager(self, typ: str) -> DataFrame: # fastpath of passing a manager doesn't check the option/manager class return DataFrame(new_mgr) + @property + def _has_array_manager(self): + return isinstance(self._mgr, ArrayManager) + # ---------------------------------------------------------------------- @property diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e1271cfec2bde..1a163035d8f01 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8801,6 +8801,13 @@ def _where( if axis is not None: axis = self._get_axis_number(axis) + cond_orig = cond + + # Needed for DataFrames with ArrayManager, see below for details + all_bool_columns = False + if isinstance(cond, ABCDataFrame) and cond._has_array_manager: + all_bool_columns = all(is_bool_dtype(dt) for dt in cond.dtypes) + # align the cond to same shape as myself cond = com.apply_if_callable(cond, self) if isinstance(cond, NDFrame): @@ -8812,9 +8819,32 @@ def _where( raise ValueError("Array conditional must be same shape as self") cond = self._constructor(cond, **self._construct_axes_dict()) + # Needed for DataFrames with ArrayManager, see below for details + if ( + isinstance(cond, ABCDataFrame) + and cond._has_array_manager + and isinstance(cond_orig, ABCSeries) + ): + all_bool_columns = is_bool_dtype(cond_orig.dtype) + # make sure we are boolean fill_value = bool(inplace) - cond = cond.fillna(fill_value) + try: + cond = cond.fillna(fill_value) + except TypeError: + # With ArrayManager, fillna can raise an error if `cond` is not + # of boolean dtype + raise ValueError("Boolean array expected for the condition") + + # With ArrayManager, `fillna` does not automatically change object dtype + # back to bools (if the alignment made it object by introducing NaNs). + # So in this case we cast back to bool manually *if* the original columns + # before aligning were bool + # TODO this workaround can be removed once we have nullable boolean dtype + # as default + if isinstance(cond, ABCDataFrame) and cond._has_array_manager: + if not all(is_bool_dtype(dt) for dt in cond.dtypes) and all_bool_columns: + cond = cond.astype(bool) msg = "Boolean array expected for the condition, not {dtype}" diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 0f677ff3180be..40ce563f86162 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -454,7 +454,7 @@ def is_mixed_type(self) -> bool: @property def is_numeric_mixed_type(self) -> bool: - return False + return all(is_numeric_dtype(t) for t in self.get_dtypes()) @property def any_extension_types(self) -> bool: diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 2f098426efaf9..e4b161f9240ff 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -159,7 +159,7 @@ def test_where_set(self, where_frame, float_string_frame): def _check_set(df, cond, check_dtypes=True): dfi = df.copy() - econd = cond.reindex_like(df).fillna(True) + econd = cond.reindex_like(df).fillna(True).astype(bool) expected = dfi.mask(~econd) return_value = dfi.where(cond, np.nan, inplace=True) @@ -499,6 +499,7 @@ def test_where_axis(self): assert return_value is None tm.assert_frame_equal(result, expected) + def test_where_axis_multiple_dtypes(self): # Multiple dtypes (=> multiple Blocks) df = pd.concat( [ diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 58016be82c405..035f31a29c599 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -544,3 +544,14 @@ def test_fillna_nonconsolidated_frame(): df_nonconsol = df.pivot("i1", "i2") result = df_nonconsol.fillna(0) assert result.isna().sum().sum() == 0 + + +def test_fillna_infer_bool_dtype(using_array_manager): + # With ArrayManager, fillna doesn't change/infer dtypes + df = DataFrame([[True, False], [np.nan, True], [False, None]], dtype=object) + result = df.fillna(True) + if using_array_manager: + expected = DataFrame([[True, False], [True, True], [False, True]], dtype=object) + else: + expected = DataFrame([[True, False], [True, True], [False, True]], dtype=bool) + tm.assert_frame_equal(result, expected)