ENH: support monotonic decreasing indexes in Index.slice_locs

also: NaN and NaT imply not monotonic
wavexx · Nov 2, 2014 · db6f8fd · db6f8fd
1 parent a5915f7
commit db6f8fd
Show file tree

Hide file tree

Showing 11 changed files with 231 additions and 62 deletions.
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -1166,6 +1166,8 @@ Attributes
 
    Index.values
    Index.is_monotonic
+   Index.is_monotonic_increasing
+   Index.is_monotonic_decreasing
    Index.is_unique
    Index.dtype
    Index.inferred_type

diff --git a/doc/source/whatsnew/v0.15.1.txt b/doc/source/whatsnew/v0.15.1.txt
@@ -146,6 +146,29 @@ API changes
 
      s.dt.hour
 
+- support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is
+  not found in the index (:issue:`7860`):
+
+  .. ipython:: python
+
+    s = pd.Series(['a', 'b', 'c', 'd'], [4, 3, 2, 1])
+    s
+
+  previous behavior:
+
+  .. code-block:: python
+
+    In [8]: s.loc[3.5:1.5]
+    KeyError: 3.5
+
+  current behavior:
+
+  .. ipython:: python
+
+    s.loc[3.5:1.5]
+
+- added Index properties `is_monotonic_increasing` and `is_monotonic_decreasing` (:issue:`8680`).
+
 .. _whatsnew_0151.enhancements:
 
 Enhancements
@@ -208,8 +231,9 @@ Bug Fixes
 - Bug in ix/loc block splitting on setitem (manifests with integer-like dtypes, e.g. datetime64) (:issue:`8607`)
 
 
-
-
+- Bug when doing label based indexing with integers not found in the index for
+  non-unique but monotonic indexes (:issue:`8680`).
+- Bug when indexing a Float64Index with ``np.nan`` on numpy 1.7 (:issue:`8980`).
 
 
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1461,7 +1461,7 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
                             name=self.index[loc])
 
         else:
-            result = self[loc]
+            result = self.iloc[loc]
             result.index = new_index
 
         # this could be a view

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -2002,16 +2002,12 @@ def _get_slice(starting_value, offset, search_side, slice_property,
                     slc += offset
 
             except KeyError:
-                if self.is_monotonic:
-
-                    # we are duplicated but non-unique
-                    # so if we have an indexer then we are done
-                    # else search for it (GH 7523)
-                    if not is_unique and is_integer(search_value):
-                        slc = search_value
-                    else:
-                        slc = self.searchsorted(search_value,
-                                                side=search_side)
+                if self.is_monotonic_increasing:
+                    slc = self.searchsorted(search_value, side=search_side)
+                elif self.is_monotonic_decreasing:
+                    search_side = 'right' if search_side == 'left' else 'left'
+                    slc = len(self) - self[::-1].searchsorted(search_value,
+                                                              side=search_side)
                 else:
                     raise
             return slc
@@ -2445,10 +2441,13 @@ def __contains__(self, other):
     def get_loc(self, key):
         try:
             if np.all(np.isnan(key)):
+                nan_idxs = self._nan_idxs
                 try:
-                    return self._nan_idxs.item()
-                except ValueError:
-                    return self._nan_idxs
+                    return nan_idxs.item()
+                except (ValueError, IndexError):
+                    # should only need to catch ValueError here but on numpy
+                    # 1.7 .item() can raise IndexError when NaNs are present
+                    return nan_idxs
         except (TypeError, NotImplementedError):
             pass
         return super(Float64Index, self).get_loc(key)

diff --git a/pandas/index.pyx b/pandas/index.pyx
@@ -356,7 +356,7 @@ cdef class Int64Engine(IndexEngine):
         return _hash.Int64HashTable(n)
 
     def _call_monotonic(self, values):
-        return algos.is_monotonic_int64(values)
+        return algos.is_monotonic_int64(values, timelike=False)
 
     def get_pad_indexer(self, other, limit=None):
         return algos.pad_int64(self._get_index_values(), other,
@@ -446,7 +446,7 @@ cdef class Float64Engine(IndexEngine):
         return result
 
     def _call_monotonic(self, values):
-        return algos.is_monotonic_float64(values)
+        return algos.is_monotonic_float64(values, timelike=False)
 
     def get_pad_indexer(self, other, limit=None):
         return algos.pad_float64(self._get_index_values(), other,
@@ -500,7 +500,7 @@ cdef class ObjectEngine(IndexEngine):
         return _hash.PyObjectHashTable(n)
 
     def _call_monotonic(self, values):
-        return algos.is_monotonic_object(values)
+        return algos.is_monotonic_object(values, timelike=False)
 
     def get_pad_indexer(self, other, limit=None):
         return algos.pad_object(self._get_index_values(), other,
@@ -532,7 +532,7 @@ cdef class DatetimeEngine(Int64Engine):
         return self.vgetter().view('i8')
 
     def _call_monotonic(self, values):
-        return algos.is_monotonic_int64(values)
+        return algos.is_monotonic_int64(values, timelike=True)
 
     cpdef get_loc(self, object val):
         if is_definitely_invalid_key(val):

diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py
@@ -539,7 +539,7 @@ def diff_2d_%(name)s(ndarray[%(c_type)s, ndim=2] arr,
 
 is_monotonic_template = """@cython.boundscheck(False)
 @cython.wraparound(False)
-def is_monotonic_%(name)s(ndarray[%(c_type)s] arr):
+def is_monotonic_%(name)s(ndarray[%(c_type)s] arr, bint timelike):
     '''
     Returns
     -------
@@ -554,18 +554,32 @@ def is_monotonic_%(name)s(ndarray[%(c_type)s] arr):
 
     n = len(arr)
 
-    if n < 2:
+    if n == 1:
+        if arr[0] != arr[0] or (timelike and arr[0] == iNaT):
+            # single value is NaN
+            return False, False, True
+        else:
+            return True, True, True
+    elif n < 2:
         return True, True, True
 
+    if timelike and arr[0] == iNaT:
+        return False, False, None
+
     prev = arr[0]
     for i in range(1, n):
         cur = arr[i]
+        if timelike and cur == iNaT:
+            return False, False, None
         if cur < prev:
             is_monotonic_inc = 0
         elif cur > prev:
             is_monotonic_dec = 0
         elif cur == prev:
             is_unique = 0
+        else:
+            # cur or prev is NaN
+            return False, False, None
         if not is_monotonic_inc and not is_monotonic_dec:
             return False, False, None
         prev = cur