Skip to content

Commit

Permalink
BUG: Bug in multi-index slicing with various edge cases (GH8132)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Aug 28, 2014
1 parent 1e3da90 commit 21ccaf2
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 11 deletions.
1 change: 1 addition & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ Bug Fixes
- Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`)
- Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`)
- Bug in multi-index slicing with missing indexers (:issue:`7866`)
- Bug in multi-index slicing with various edge cases (:issue:`8132`)
- Regression in multi-index indexing with a non-scalar type object (:issue:`7914`)
- Bug in Timestamp comparisons with ``==`` and dtype of int64 (:issue:`8058`)
- Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is reffered internally (:issue:`7748`)
Expand Down
18 changes: 15 additions & 3 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3927,9 +3927,21 @@ def _get_level_indexer(self, key, level=0):
# handle a slice, returnig a slice if we can
# otherwise a boolean indexer

start = level_index.get_loc(key.start or 0)
stop = level_index.get_loc(key.stop or len(level_index)-1)
step = key.step
try:
if key.start is not None:
start = level_index.get_loc(key.start)
else:
start = 0
if key.stop is not None:
stop = level_index.get_loc(key.stop)
else:
stop = len(level_index)-1
step = key.step
except (KeyError):

# we have a partial slice (like looking up a partial date string)
start = stop = level_index.slice_indexer(key.start, key.stop, key.step)
step = start.step

if isinstance(start,slice) or isinstance(stop,slice):
# we have a slice for start and/or stop
Expand Down
86 changes: 78 additions & 8 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,76 @@ def test_multiindex_slicers_datetimelike(self):
result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'],1), idx['A','B']]
assert_frame_equal(result,expected)


def test_multiindex_slicers_edges(self):

# GH 8132
# various edge cases
df = DataFrame({'A': ['A0'] * 5 + ['A1']*5 + ['A2']*5,
'B': ['B0','B0','B1','B1','B2'] * 3,
'DATE': ["2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-06-11",
"2013-07-02",
"2013-07-09",
"2013-07-30",
"2013-08-06",
"2013-09-03",
"2013-10-01",
"2013-07-09",
"2013-08-06",
"2013-09-03"],
'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3,4, 2]})

df['DATE'] = pd.to_datetime(df['DATE'])
df1 = df.set_index(['A', 'B', 'DATE'])
df1 = df1.sortlevel()
df2 = df.set_index('DATE')

# A1 - Get all values under "A0" and "A1"
result = df1.loc[(slice('A1')),:]
expected = df1.iloc[0:10]
assert_frame_equal(result, expected)

# A2 - Get all values from the start to "A2"
result = df1.loc[(slice('A2')),:]
expected = df1
assert_frame_equal(result, expected)

# A3 - Get all values under "B1" or "B2"
result = df1.loc[(slice(None),slice('B1','B2')),:]
expected = df1.iloc[[2,3,4,7,8,9,12,13,14]]
assert_frame_equal(result, expected)

# A4 - Get all values between 2013-07-02 and 2013-07-09
result = df1.loc[(slice(None),slice(None),slice('20130702','20130709')),:]
expected = df1.iloc[[1,2,6,7,12]]
assert_frame_equal(result, expected)

# B1 - Get all values in B0 that are also under A0, A1 and A2
result = df1.loc[(slice('A2'),slice('B0')),:]
expected = df1.iloc[[0,1,5,6,10,11]]
assert_frame_equal(result, expected)

# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for the As)
result = df1.loc[(slice(None),slice('B2')),:]
expected = df1
assert_frame_equal(result, expected)

# B3 - Get all values from B1 to B2 and up to 2013-08-06
result = df1.loc[(slice(None),slice('B1','B2'),slice('2013-08-06')),:]
expected = df1.iloc[[2,3,4,7,8,9,12,13]]
assert_frame_equal(result, expected)

# B4 - Same as A4 but the start of the date slice is not a key.
# shows indexing on a partial selection slice
result = df1.loc[(slice(None),slice(None),slice('20130701','20130709')),:]
expected = df1.iloc[[1,2,6,7,12]]
assert_frame_equal(result, expected)

def test_per_axis_per_level_doc_examples(self):

# test index maker
Expand Down Expand Up @@ -3831,11 +3901,11 @@ class TestSeriesNoneCoercion(tm.TestCase):
# For numeric series, we should coerce to NaN.
([1, 2, 3], [np.nan, 2, 3]),
([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),

# For datetime series, we should coerce to NaT.
([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
[NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),

# For objects, we should preserve the None value.
(["foo", "bar", "baz"], [None, "bar", "baz"]),
]
Expand All @@ -3851,7 +3921,7 @@ def test_coercion_with_setitem(self):
self.assert_numpy_array_equivalent(
start_series.values,
expected_series.values, strict_nan=True)

def test_coercion_with_loc_setitem(self):
for start_data, expected_result in self.EXPECTED_RESULTS:
start_series = Series(start_data)
Expand All @@ -3863,7 +3933,7 @@ def test_coercion_with_loc_setitem(self):
self.assert_numpy_array_equivalent(
start_series.values,
expected_series.values, strict_nan=True)

def test_coercion_with_setitem_and_series(self):
for start_data, expected_result in self.EXPECTED_RESULTS:
start_series = Series(start_data)
Expand All @@ -3875,7 +3945,7 @@ def test_coercion_with_setitem_and_series(self):
self.assert_numpy_array_equivalent(
start_series.values,
expected_series.values, strict_nan=True)

def test_coercion_with_loc_and_series(self):
for start_data, expected_result in self.EXPECTED_RESULTS:
start_series = Series(start_data)
Expand All @@ -3887,18 +3957,18 @@ def test_coercion_with_loc_and_series(self):
self.assert_numpy_array_equivalent(
start_series.values,
expected_series.values, strict_nan=True)


class TestDataframeNoneCoercion(tm.TestCase):
EXPECTED_SINGLE_ROW_RESULTS = [
# For numeric series, we should coerce to NaN.
([1, 2, 3], [np.nan, 2, 3]),
([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),

# For datetime series, we should coerce to NaT.
([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
[NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]),

# For objects, we should preserve the None value.
(["foo", "bar", "baz"], [None, "bar", "baz"]),
]
Expand Down

0 comments on commit 21ccaf2

Please sign in to comment.