pandas-dev · WillAyd · Jul 7, 2018 · Jul 7, 2018 · Jul 7, 2018 · Jul 7, 2018
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -364,52 +364,46 @@ def nested_renaming_depr(level=4):
                      "version"),
                     FutureWarning, stacklevel=level)
 
-            # if we have a dict of any non-scalars
-            # eg. {'A' : ['mean']}, normalize all to
-            # be list-likes
-            if any(is_aggregator(x) for x in compat.itervalues(arg)):
-                new_arg = compat.OrderedDict()
-                for k, v in compat.iteritems(arg):
-                    if not isinstance(v, (tuple, list, dict)):
-                        new_arg[k] = [v]
-                    else:
-                        new_arg[k] = v
-
-                    # the keys must be in the columns
-                    # for ndim=2, or renamers for ndim=1
-
-                    # ok for now, but deprecated
-                    # {'A': { 'ra': 'mean' }}
-                    # {'A': { 'ra': ['mean'] }}
-                    # {'ra': ['mean']}
-
-                    # not ok
-                    # {'ra' : { 'A' : 'mean' }}
-                    if isinstance(v, dict):
-                        is_nested_renamer = True
-
-                        if k not in obj.columns:
-                            msg = ('cannot perform renaming for {key} with a '
-                                   'nested dictionary').format(key=k)
-                            raise SpecificationError(msg)
-                        nested_renaming_depr(4 + (_level or 0))
-
-                    elif isinstance(obj, ABCSeries):
-                        nested_renaming_depr()
-                    elif isinstance(obj, ABCDataFrame) and \
-                            k not in obj.columns:
-                        raise KeyError(
-                            "Column '{col}' does not exist!".format(col=k))
-
-                arg = new_arg
-
-            else:
+            if any(issubclass(type(x), dict) for x in compat.itervalues(arg)):
                 # deprecation of renaming keys
                 # GH 15931
-                keys = list(compat.iterkeys(arg))
-                if (isinstance(obj, ABCDataFrame) and
-                        len(obj.columns.intersection(keys)) != len(keys)):
+                nested_renaming_depr()
+
+            # normalize all non-scalars be list-likes
+            new_arg = compat.OrderedDict()
+            for k, v in compat.iteritems(arg):
+                if not isinstance(v, (tuple, list, dict)):
+                    new_arg[k] = [v]
+                else:
+                    new_arg[k] = v
+
+                # the keys must be in the columns
+                # for ndim=2, or renamers for ndim=1
+
+                # ok for now, but deprecated
+                # {'A': { 'ra': 'mean' }}
+                # {'A': { 'ra': ['mean'] }}
+                # {'ra': ['mean']}
+
+                # not ok
+                # {'ra' : { 'A' : 'mean' }}
+                if isinstance(v, dict):
+                    is_nested_renamer = True
+
+                    if k not in obj.columns:
+                        msg = ('cannot perform renaming for {key} with a '
+                               'nested dictionary').format(key=k)
+                        raise SpecificationError(msg)
+                    nested_renaming_depr(4 + (_level or 0))
+
+                elif isinstance(obj, ABCSeries):
                     nested_renaming_depr()
+                elif isinstance(obj, ABCDataFrame) and \
+                        k not in obj.columns:
+                    raise KeyError(
+                        "Column '{col}' does not exist!".format(col=k))
+
+                arg = new_arg
 
             from pandas.core.reshape.concat import concat
 
@@ -456,11 +450,6 @@ def _agg(arg, func):
                         result.update(r)
                     keys = list(compat.iterkeys(result))
 
-                else:
-
-                    if self._selection is not None:
-                        keys = None
-
             # some selection on the object
             elif self._selection is not None:
 

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -117,11 +117,11 @@ def test_agg_python_multiindex(mframe):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize('groupbyfunc', [
-    lambda x: x.weekday(),
-    [lambda x: x.month, lambda x: x.weekday()],
+@pytest.mark.parametrize('groupbyfunc,multiple', [
+    (lambda x: x.weekday(), False),
+    ([lambda x: x.month, lambda x: x.weekday()], True),
 ])
-def test_aggregate_str_func(tsframe, groupbyfunc):
+def test_aggregate_str_func(tsframe, groupbyfunc, multiple):
     grouped = tsframe.groupby(groupbyfunc)
 
     # single series
@@ -139,10 +139,19 @@ def test_aggregate_str_func(tsframe, groupbyfunc):
                                       ['B', 'std'],
                                       ['C', 'mean'],
                                       ['D', 'sem']]))
-    expected = DataFrame(OrderedDict([['A', grouped['A'].var()],
-                                      ['B', grouped['B'].std()],
-                                      ['C', grouped['C'].mean()],
-                                      ['D', grouped['D'].sem()]]))
+
+    columns = pd.MultiIndex.from_arrays([
+        list('ABCD'), ['var', 'std', 'mean', 'sem']])
+    expected = DataFrame(list(zip(grouped['A'].var(),
+                                  grouped['B'].std(),
+                                  grouped['C'].mean(),
+                                  grouped['D'].sem())), columns=columns)
+
+    if multiple:
+        mi = pd.MultiIndex.from_product([
+            range(1, len(groupbyfunc) + 1, 1), range(5)])
+        expected = expected.set_index(mi)
+
     tm.assert_frame_equal(result, expected)
 
 
@@ -225,27 +234,24 @@ def test_more_flexible_frame_multi_function(df):
 
     exmean = grouped.agg(OrderedDict([['C', np.mean], ['D', np.mean]]))
     exstd = grouped.agg(OrderedDict([['C', np.std], ['D', np.std]]))
-
-    expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1)
-    expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1)
+    expected = concat([exmean, exstd], axis=1)
+    expected = expected.sort_index(axis=1)
 
     d = OrderedDict([['C', [np.mean, np.std]], ['D', [np.mean, np.std]]])
     result = grouped.aggregate(d)
-
     tm.assert_frame_equal(result, expected)
 
     # be careful
     result = grouped.aggregate(OrderedDict([['C', np.mean],
                                             ['D', [np.mean, np.std]]]))
     expected = grouped.aggregate(OrderedDict([['C', np.mean],
                                               ['D', [np.mean, np.std]]]))
+
     tm.assert_frame_equal(result, expected)
 
-    def foo(x):
-        return np.mean(x)
 
-    def bar(x):
-        return np.std(x, ddof=1)
+def test_more_flexible_frame_mult_function_warns(df):
+    grouped = df.groupby('A')
 
     # this uses column selection & renaming
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
@@ -254,6 +260,12 @@ def bar(x):
                                             ['bar', np.std]])]])
         result = grouped.aggregate(d)
 
+    def foo(x):
+        return np.mean(x)
+
+    def bar(x):
+        return np.std(x, ddof=1)
+
     d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]])
     expected = grouped.aggregate(d)
 
@@ -271,18 +283,18 @@ def test_multi_function_flexible_mix(df):
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         expected = grouped.aggregate(d)
 
-    # Test 1
     d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
-                     ['D', 'sum']])
+                     ['D', ['sum']]])
     # this uses column selection & renaming
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         result = grouped.aggregate(d)
     tm.assert_frame_equal(result, expected)
 
-    # Test 2
     d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
-                     ['D', ['sum']]])
+                     ['D', 'sum']])
+
     # this uses column selection & renaming
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         result = grouped.aggregate(d)
+
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -99,19 +99,23 @@ def test_agg_dict_parameter_cast_result_dtypes():
     grouped = df.groupby('class')
     tm.assert_frame_equal(grouped.first(), exp)
     tm.assert_frame_equal(grouped.agg('first'), exp)
-    tm.assert_frame_equal(grouped.agg({'time': 'first'}), exp)
     tm.assert_series_equal(grouped.time.first(), exp['time'])
     tm.assert_series_equal(grouped.time.agg('first'), exp['time'])
 
+    exp.columns = pd.MultiIndex.from_tuples([('time', 'first')])
+    tm.assert_frame_equal(grouped.agg({'time': 'first'}), exp)
+
     # test for `last` function
     exp = df.loc[[0, 3, 4, 7]].set_index('class')
     grouped = df.groupby('class')
     tm.assert_frame_equal(grouped.last(), exp)
     tm.assert_frame_equal(grouped.agg('last'), exp)
-    tm.assert_frame_equal(grouped.agg({'time': 'last'}), exp)
     tm.assert_series_equal(grouped.time.last(), exp['time'])
     tm.assert_series_equal(grouped.time.agg('last'), exp['time'])
 
+    exp.columns = pd.MultiIndex.from_tuples([('time', 'last')])
+    tm.assert_frame_equal(grouped.agg({'time': 'last'}), exp)
+
     # count
     exp = pd.Series([2, 2, 2, 2],
                     index=Index(list('ABCD'), name='class'),
@@ -192,7 +196,9 @@ def test_aggregate_api_consistency():
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg({'C': 'mean', 'D': 'sum'})
-    expected = pd.concat([d_sum, c_mean], axis=1)
+    expected = pd.concat([c_mean, d_sum], axis=1)
+    expected.columns = MultiIndex.from_arrays([['C', 'D'],
+                                               ['mean', 'sum']])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg({'C': ['mean', 'sum'],
@@ -201,13 +207,19 @@ def test_aggregate_api_consistency():
     expected.columns = MultiIndex.from_product([['C', 'D'],
                                                 ['mean', 'sum']])
 
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+def test_aggregate_api_raises():
+    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                          'foo', 'bar', 'foo', 'foo'],
+                    'B': ['one', 'one', 'two', 'two',
+                          'two', 'two', 'one', 'two'],
+                    'C': np.random.randn(8) + 1.0,
+                    'D': np.arange(8)})
+
+    grouped = df.groupby(['A', 'B'])
+
+    with pytest.raises(KeyError):
         result = grouped[['D', 'C']].agg({'r': np.sum,
                                           'r2': np.mean})
-    expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
-    expected.columns = MultiIndex.from_product([['r', 'r2'],
-                                                ['D', 'C']])
-    tm.assert_frame_equal(result, expected, check_like=True)
 
 
 def test_agg_dict_renaming_deprecation():
@@ -222,14 +234,21 @@ def test_agg_dict_renaming_deprecation():
                              'C': {'bar': ['count', 'min']}})
         assert "using a dict with renaming" in str(w[0].message)
 
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        df.groupby('A')[['B', 'C']].agg({'ma': 'max'})
-
+    # TODO: Shouldn't the below fail as well?
     with tm.assert_produces_warning(FutureWarning) as w:
         df.groupby('A').B.agg({'foo': 'count'})
         assert "using a dict on a Series for aggregation" in str(w[0].message)
 
 
+def test_agg_dict_renaming_deprecation_raises():
+    df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
+                       'B': range(5),
+                       'C': range(5)})
+
+    with pytest.raises(KeyError):
+        df.groupby('A')[['B', 'C']].agg({'ma': 'max'})
+
+
 def test_agg_compat():
     # GH 12334
     df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
@@ -267,11 +286,6 @@ def test_agg_nested_dicts():
 
     g = df.groupby(['A', 'B'])
 
-    msg = r'cannot perform renaming for r[1-2] with a nested dictionary'
-    with tm.assert_raises_regex(SpecificationError, msg):
-        g.aggregate({'r1': {'C': ['mean', 'sum']},
-                     'r2': {'D': ['mean', 'sum']}})
-
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         result = g.agg({'C': {'ra': ['mean', 'std']},
                         'D': {'rb': ['mean', 'std']}})
@@ -283,6 +297,23 @@ def test_agg_nested_dicts():
          ('rb', 'mean'), ('rb', 'std')])
     tm.assert_frame_equal(result, expected, check_like=True)
 
+
+def test_agg_nested_dicts_raises():
+    # API change for disallowing these types of nested dicts
+    df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                          'foo', 'bar', 'foo', 'foo'],
+                    'B': ['one', 'one', 'two', 'two',
+                          'two', 'two', 'one', 'two'],
+                    'C': np.random.randn(8) + 1.0,
+                    'D': np.arange(8)})
+
+    g = df.groupby(['A', 'B'])
+
+    msg = r'cannot perform renaming for r[1-2] with a nested dictionary'
+    with tm.assert_raises_regex(SpecificationError, msg):
+        g.aggregate({'r1': {'C': ['mean', 'sum']},
+                     'r2': {'D': ['mean', 'sum']}})
+
     # same name as the original column
     # GH9052
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -552,6 +552,8 @@ def test_groupby_as_index_agg(df):
     result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
     expected2 = grouped.mean()
     expected2['D'] = grouped.sum()['D']
+    expected2.columns = pd.MultiIndex.from_arrays([
+        expected2.columns, ['', 'mean', 'sum']])
     assert_frame_equal(result2, expected2)
 
     grouped = df.groupby('A', as_index=True)
@@ -561,6 +563,7 @@ def test_groupby_as_index_agg(df):
     with tm.assert_produces_warning(FutureWarning,
                                     check_stacklevel=False):
         result3 = grouped['C'].agg({'Q': np.sum})
+
     assert_frame_equal(result3, expected3)
 
     # multi-key
@@ -574,10 +577,14 @@ def test_groupby_as_index_agg(df):
     result2 = grouped.agg(OrderedDict([['C', np.mean], ['D', np.sum]]))
     expected2 = grouped.mean()
     expected2['D'] = grouped.sum()['D']
+    expected2.columns = pd.MultiIndex.from_arrays([
+        expected2.columns, ['', '', 'mean', 'sum']])
     assert_frame_equal(result2, expected2)
 
     expected3 = grouped['C'].sum()
     expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
+    expected3.columns = pd.MultiIndex.from_arrays([
+        expected3.columns, ['', '', 'sum']])
     result3 = grouped['C'].agg({'Q': np.sum})
     assert_frame_equal(result3, expected3)
 
@@ -1340,6 +1347,7 @@ def test_multifunc_sum_bug():
 
     grouped = x.groupby('test')
     result = grouped.agg({'fl': 'sum', 2: 'size'})
+    result.columns = result.columns.droplevel(-1)
     assert result['fl'].dtype == np.float64
 
 
@@ -1693,3 +1701,25 @@ def test_groupby_agg_ohlc_non_first():
     result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc'])
 
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("select_columns", [True, False])
+@pytest.mark.parametrize("agg_argument", [
+    {'B': 'sum', 'C': 'min'},  # Scalar result
+    {'B': 'sum', 'C': ['min']},  # Scalar and list
+    {'B': ['sum'], 'C': ['min']},  # Lists
+    {'B': {'sum': 'sum'}, 'C': {'min': 'min'}}  # deprecated call
+])
+def test_agg_dict_naming_consistency(select_columns, agg_argument):
+    df = pd.DataFrame([['foo', 1, 1], ['bar', 1, 1]], columns=['A', 'B', 'C'])
+    expected = pd.DataFrame([[1, 1], [1, 1]], index=pd.Index(
+        ['bar', 'foo'], name='A'), columns=pd.MultiIndex.from_tuples(
+            (('B', 'sum'), ('C', 'min'))))
+
+    with catch_warnings(record=True):
+        if select_columns:
+            result = df.groupby('A')[['B', 'C']].agg(agg_argument)
+        else:
+            result = df.groupby('A').agg(agg_argument)
+
+    tm.assert_frame_equal(result, expected)