Skip to content

Commit

Permalink
ENH: support monotonic decreasing indexes in Index.slice_locs
Browse files Browse the repository at this point in the history
also: NaN and NaT imply not monotonic
  • Loading branch information
shoyer committed Nov 2, 2014
1 parent a5915f7 commit db6f8fd
Show file tree
Hide file tree
Showing 11 changed files with 231 additions and 62 deletions.
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1166,6 +1166,8 @@ Attributes

Index.values
Index.is_monotonic
Index.is_monotonic_increasing
Index.is_monotonic_decreasing
Index.is_unique
Index.dtype
Index.inferred_type
Expand Down
28 changes: 26 additions & 2 deletions doc/source/whatsnew/v0.15.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,29 @@ API changes

s.dt.hour

- support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is
not found in the index (:issue:`7860`):

.. ipython:: python

s = pd.Series(['a', 'b', 'c', 'd'], [4, 3, 2, 1])
s

previous behavior:

.. code-block:: python

In [8]: s.loc[3.5:1.5]
KeyError: 3.5

current behavior:

.. ipython:: python

s.loc[3.5:1.5]

- added Index properties `is_monotonic_increasing` and `is_monotonic_decreasing` (:issue:`8680`).

.. _whatsnew_0151.enhancements:

Enhancements
Expand Down Expand Up @@ -208,8 +231,9 @@ Bug Fixes
- Bug in ix/loc block splitting on setitem (manifests with integer-like dtypes, e.g. datetime64) (:issue:`8607`)




- Bug when doing label based indexing with integers not found in the index for
non-unique but monotonic indexes (:issue:`8680`).
- Bug when indexing a Float64Index with ``np.nan`` on numpy 1.7 (:issue:`8980`).



Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1461,7 +1461,7 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
name=self.index[loc])

else:
result = self[loc]
result = self.iloc[loc]
result.index = new_index

# this could be a view
Expand Down
25 changes: 12 additions & 13 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2002,16 +2002,12 @@ def _get_slice(starting_value, offset, search_side, slice_property,
slc += offset

except KeyError:
if self.is_monotonic:

# we are duplicated but non-unique
# so if we have an indexer then we are done
# else search for it (GH 7523)
if not is_unique and is_integer(search_value):
slc = search_value
else:
slc = self.searchsorted(search_value,
side=search_side)
if self.is_monotonic_increasing:
slc = self.searchsorted(search_value, side=search_side)
elif self.is_monotonic_decreasing:
search_side = 'right' if search_side == 'left' else 'left'
slc = len(self) - self[::-1].searchsorted(search_value,
side=search_side)
else:
raise
return slc
Expand Down Expand Up @@ -2445,10 +2441,13 @@ def __contains__(self, other):
def get_loc(self, key):
try:
if np.all(np.isnan(key)):
nan_idxs = self._nan_idxs
try:
return self._nan_idxs.item()
except ValueError:
return self._nan_idxs
return nan_idxs.item()
except (ValueError, IndexError):
# should only need to catch ValueError here but on numpy
# 1.7 .item() can raise IndexError when NaNs are present
return nan_idxs
except (TypeError, NotImplementedError):
pass
return super(Float64Index, self).get_loc(key)
Expand Down
8 changes: 4 additions & 4 deletions pandas/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ cdef class Int64Engine(IndexEngine):
return _hash.Int64HashTable(n)

def _call_monotonic(self, values):
return algos.is_monotonic_int64(values)
return algos.is_monotonic_int64(values, timelike=False)

def get_pad_indexer(self, other, limit=None):
return algos.pad_int64(self._get_index_values(), other,
Expand Down Expand Up @@ -446,7 +446,7 @@ cdef class Float64Engine(IndexEngine):
return result

def _call_monotonic(self, values):
return algos.is_monotonic_float64(values)
return algos.is_monotonic_float64(values, timelike=False)

def get_pad_indexer(self, other, limit=None):
return algos.pad_float64(self._get_index_values(), other,
Expand Down Expand Up @@ -500,7 +500,7 @@ cdef class ObjectEngine(IndexEngine):
return _hash.PyObjectHashTable(n)

def _call_monotonic(self, values):
return algos.is_monotonic_object(values)
return algos.is_monotonic_object(values, timelike=False)

def get_pad_indexer(self, other, limit=None):
return algos.pad_object(self._get_index_values(), other,
Expand Down Expand Up @@ -532,7 +532,7 @@ cdef class DatetimeEngine(Int64Engine):
return self.vgetter().view('i8')

def _call_monotonic(self, values):
return algos.is_monotonic_int64(values)
return algos.is_monotonic_int64(values, timelike=True)

cpdef get_loc(self, object val):
if is_definitely_invalid_key(val):
Expand Down
18 changes: 16 additions & 2 deletions pandas/src/generate_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ def diff_2d_%(name)s(ndarray[%(c_type)s, ndim=2] arr,

is_monotonic_template = """@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic_%(name)s(ndarray[%(c_type)s] arr):
def is_monotonic_%(name)s(ndarray[%(c_type)s] arr, bint timelike):
'''
Returns
-------
Expand All @@ -554,18 +554,32 @@ def is_monotonic_%(name)s(ndarray[%(c_type)s] arr):
n = len(arr)
if n < 2:
if n == 1:
if arr[0] != arr[0] or (timelike and arr[0] == iNaT):
# single value is NaN
return False, False, True
else:
return True, True, True
elif n < 2:
return True, True, True
if timelike and arr[0] == iNaT:
return False, False, None
prev = arr[0]
for i in range(1, n):
cur = arr[i]
if timelike and cur == iNaT:
return False, False, None
if cur < prev:
is_monotonic_inc = 0
elif cur > prev:
is_monotonic_dec = 0
elif cur == prev:
is_unique = 0
else:
# cur or prev is NaN
return False, False, None
if not is_monotonic_inc and not is_monotonic_dec:
return False, False, None
prev = cur
Expand Down
Loading

0 comments on commit db6f8fd

Please sign in to comment.