Skip to content

Commit

Permalink
ERR: Boolean comparisons of a Series vs None will now be equivalent o…
Browse files Browse the repository at this point in the history
…f to null comparisions, rather than raise TypeError, xref, pandas-dev#1079
  • Loading branch information
jreback committed Jul 17, 2015
1 parent 5b97367 commit 4fe7c68
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 103 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ Other API Changes
- Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`)
- Allow passing `kwargs` to the interpolation methods (:issue:`10378`).
- Serialize metadata properties of subclasses of pandas objects (:issue:`10553`).

- Boolean comparisons of a ``Series`` vs None will now be equivalent to comparing with np.nan, rather than raise ``TypeError``, xref (:issue:`1079`).

.. _whatsnew_0170.deprecations:

Expand Down
71 changes: 48 additions & 23 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@
from pandas.util.decorators import Appender
import pandas.core.common as com
import pandas.computation.expressions as expressions
from pandas.lib import isscalar
from pandas.tslib import iNaT
from pandas.core.common import(bind_method, is_list_like, notnull, isnull,
_values_from_object, _maybe_match_name)
_values_from_object, _maybe_match_name,
needs_i8_conversion, is_integer_dtype)

# -----------------------------------------------------------------------------
# Functions that add arithmetic methods to objects, given arithmetic factory
Expand Down Expand Up @@ -257,7 +260,7 @@ class _TimeOp(object):
Generally, you should use classmethod ``maybe_convert_for_time_op`` as an
entry point.
"""
fill_value = tslib.iNaT
fill_value = iNaT
wrap_results = staticmethod(lambda x: x)
dtype = None

Expand Down Expand Up @@ -346,7 +349,7 @@ def _convert_to_array(self, values, name=None, other=None):
if (other is not None and other.dtype == 'timedelta64[ns]' and
all(isnull(v) for v in values)):
values = np.empty(values.shape, dtype=other.dtype)
values[:] = tslib.iNaT
values[:] = iNaT

# a datelike
elif isinstance(values, pd.DatetimeIndex):
Expand Down Expand Up @@ -381,7 +384,7 @@ def _convert_to_array(self, values, name=None, other=None):
# all nan, so ok, use the other dtype (e.g. timedelta or datetime)
if isnull(values).all():
values = np.empty(values.shape, dtype=other.dtype)
values[:] = tslib.iNaT
values[:] = iNaT
else:
raise TypeError(
'incompatible type [{0}] for a datetime/timedelta '
Expand Down Expand Up @@ -549,26 +552,59 @@ def na_op(x, y):
elif com.is_categorical_dtype(y) and not lib.isscalar(y):
return op(y,x)

if x.dtype == np.object_:
if com.is_object_dtype(x.dtype):
if isinstance(y, list):
y = lib.list_to_object_array(y)

if isinstance(y, (np.ndarray, pd.Series)):
if y.dtype != np.object_:
if not com.is_object_dtype(y.dtype):
result = lib.vec_compare(x, y.astype(np.object_), op)
else:
result = lib.vec_compare(x, y, op)
else:
result = lib.scalar_compare(x, y, op)
else:

# numpy does not like comparisons vs None
if lib.isscalar(y) and isnull(y):
y = np.nan

# we want to compare like types
# we only want to convert to integer like if
# we are not NotImplemented, otherwise
# we would allow datetime64 (but viewed as i8) against
# integer comparisons
if needs_i8_conversion(x) and (not isscalar(y) and is_integer_dtype(y)):
raise TypeError("invalid type comparison")
elif (not isscalar(y) and needs_i8_conversion(y)) and is_integer_dtype(x):
raise TypeError("invalid type comparison")

# we have a datetime/timedelta and may need to convert
mask = None
if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)):

if isscalar(y):
y = _index.convert_scalar(x,_values_from_object(y))
else:
y = y.view('i8')

if name == '__ne__':
mask = notnull(x)
else:
mask = isnull(x)

x = x.view('i8')

try:
result = getattr(x, name)(y)
if result is NotImplemented:
raise TypeError("invalid type comparison")
except (AttributeError):
except AttributeError:
result = op(x, y)

if mask is not None and mask.any():
result[mask] = False

return result

def wrapper(self, other, axis=None):
Expand Down Expand Up @@ -596,23 +632,18 @@ def wrapper(self, other, axis=None):
raise TypeError(msg.format(op=op,typ=self.dtype))


mask = isnull(self)

if com.is_categorical_dtype(self):
# cats are a special case as get_values() would return an ndarray, which would then
# not take categories ordering into account
# we can go directly to op, as the na_op would just test again and dispatch to it.
res = op(self.values, other)
else:
values = self.get_values()
other = _index.convert_scalar(values,_values_from_object(other))

if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
values = values.view('i8')
if is_list_like(other):
other = np.asarray(other)

# scalars
res = na_op(values, other)
if np.isscalar(res):
if lib.isscalar(res):
raise TypeError('Could not compare %s type with Series'
% type(other))

Expand All @@ -621,11 +652,6 @@ def wrapper(self, other, axis=None):

res = pd.Series(res, index=self.index, name=self.name,
dtype='bool')

# mask out the invalids
if mask.any():
res[mask] = masker

return res
return wrapper

Expand All @@ -643,8 +669,7 @@ def na_op(x, y):
y = lib.list_to_object_array(y)

if isinstance(y, (np.ndarray, pd.Series)):
if (x.dtype == np.bool_ and
y.dtype == np.bool_): # pragma: no cover
if (com.is_bool_dtype(x.dtype) and com.is_bool_dtype(y.dtype)):
result = op(x, y) # when would this be hit?
else:
x = com._ensure_object(x)
Expand Down Expand Up @@ -1046,7 +1071,7 @@ def na_op(x, y):

# work only for scalars
def f(self, other):
if not np.isscalar(other):
if not lib.isscalar(other):
raise ValueError('Simple arithmetic with %s can only be '
'done with scalar values' %
self._constructor.__name__)
Expand Down
10 changes: 9 additions & 1 deletion pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def isnullobj2d_old(ndarray[object, ndim=2] arr):
@cython.boundscheck(False)
cpdef ndarray[object] list_to_object_array(list obj):
'''
Convert list to object ndarray. Seriously can't believe I had to write this
Convert list to object ndarray. Seriously can\'t believe I had to write this
function
'''
cdef:
Expand Down Expand Up @@ -682,6 +682,7 @@ def scalar_compare(ndarray[object] values, object val, object op):
cdef:
Py_ssize_t i, n = len(values)
ndarray[uint8_t, cast=True] result
bint isnull_val
int flag
object x

Expand All @@ -701,12 +702,15 @@ def scalar_compare(ndarray[object] values, object val, object op):
raise ValueError('Unrecognized operator')

result = np.empty(n, dtype=bool).view(np.uint8)
isnull_val = _checknull(val)

if flag == cpython.Py_NE:
for i in range(n):
x = values[i]
if _checknull(x):
result[i] = True
elif isnull_val:
result[i] = True
else:
try:
result[i] = cpython.PyObject_RichCompareBool(x, val, flag)
Expand All @@ -717,6 +721,8 @@ def scalar_compare(ndarray[object] values, object val, object op):
x = values[i]
if _checknull(x):
result[i] = False
elif isnull_val:
result[i] = False
else:
try:
result[i] = cpython.PyObject_RichCompareBool(x, val, flag)
Expand All @@ -728,6 +734,8 @@ def scalar_compare(ndarray[object] values, object val, object op):
x = values[i]
if _checknull(x):
result[i] = False
elif isnull_val:
result[i] = False
else:
result[i] = cpython.PyObject_RichCompareBool(x, val, flag)

Expand Down
42 changes: 41 additions & 1 deletion pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas.tseries.common import is_datetimelike
from pandas import Series, Index, Int64Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta
import pandas.tslib as tslib
from pandas import _np_version_under1p9
import nose

import pandas.util.testing as tm
Expand Down Expand Up @@ -273,6 +274,45 @@ def setUp(self):
self.is_valid_objs = [ o for o in self.objs if o._allow_index_ops ]
self.not_valid_objs = [ o for o in self.objs if not o._allow_index_ops ]

def test_none_comparison(self):

# bug brought up by #1079
# changed from TypeError in 0.17.0
for o in self.is_valid_objs:
if isinstance(o, Series):

o[0] = np.nan

result = o == None
self.assertFalse(result.iat[0])
self.assertFalse(result.iat[1])

result = o != None
self.assertTrue(result.iat[0])
self.assertTrue(result.iat[1])

result = None == o
self.assertFalse(result.iat[0])
self.assertFalse(result.iat[1])

if _np_version_under1p9:
# fails as this tries not __eq__ which
# is not valid for numpy
pass
else:
result = None != o
self.assertTrue(result.iat[0])
self.assertTrue(result.iat[1])

result = None > o
self.assertFalse(result.iat[0])
self.assertFalse(result.iat[1])

result = o < None
self.assertFalse(result.iat[0])
self.assertFalse(result.iat[1])


def test_ndarray_compat_properties(self):

for o in self.objs:
Expand Down Expand Up @@ -513,7 +553,7 @@ def test_value_counts_inferred(self):
expected = Series([4, 3, 2], index=['b', 'a', 'd'])
tm.assert_series_equal(s.value_counts(), expected)

self.assert_numpy_array_equal(s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
self.assert_numpy_array_equivalent(s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
self.assertEqual(s.nunique(), 3)

s = klass({})
Expand Down
Loading

0 comments on commit 4fe7c68

Please sign in to comment.