diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ffbee0bf21a66..af2a5579bf1cd 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -188,21 +188,33 @@ class providing the base-class of operations. >>> df = pd.DataFrame({'A': 'a a b'.split(), ... 'B': [1,2,3], ... 'C': [4,6,5]}) - >>> g = df.groupby('A') + >>> g1 = df.groupby('A', group_keys=False) + >>> g2 = df.groupby('A', group_keys=True) - Notice that ``g`` has two groups, ``a`` and ``b``. - Calling `apply` in various ways, we can get different grouping results: + Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only + differ in their ``group_keys`` argument. Calling `apply` in various ways, + we can get different grouping results: Example 1: below the function passed to `apply` takes a DataFrame as its argument and returns a DataFrame. `apply` combines the result for each group together into a new DataFrame: - >>> g[['B', 'C']].apply(lambda x: x / x.sum()) + >>> g1[['B', 'C']].apply(lambda x: x / x.sum()) B C 0 0.333333 0.4 1 0.666667 0.6 2 1.000000 1.0 + In the above, the groups are not part of the index. We can have them included + by using ``g2`` where ``group_keys=True``: + + >>> g2[['B', 'C']].apply(lambda x: x / x.sum()) + B C + A + a 0 0.333333 0.4 + 1 0.666667 0.6 + b 2 1.000000 1.0 + Example 2: The function passed to `apply` takes a DataFrame as its argument and returns a Series. `apply` combines the result for each group together into a new DataFrame. @@ -211,28 +223,41 @@ class providing the base-class of operations. The resulting dtype will reflect the return value of the passed ``func``. - >>> g[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min()) + >>> g1[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min()) + B C + A + a 1.0 2.0 + b 0.0 0.0 + + >>> g2[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min()) B C A a 1.0 2.0 b 0.0 0.0 + The ``group_keys`` argument has no effect here because the result is not + like-indexed (i.e. :ref:`a transform `) when compared + to the input. + Example 3: The function passed to `apply` takes a DataFrame as its argument and returns a scalar. `apply` combines the result for each group together into a Series, including setting the index as appropriate: - >>> g.apply(lambda x: x.C.max() - x.B.min()) + >>> g1.apply(lambda x: x.C.max() - x.B.min()) A a 5 b 2 dtype: int64""", "series_examples": """ >>> s = pd.Series([0, 1, 2], index='a a b'.split()) - >>> g = s.groupby(s.index) + >>> g1 = s.groupby(s.index, group_keys=False) + >>> g2 = s.groupby(s.index, group_keys=True) From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``. - Calling `apply` in various ways, we can get different grouping results: + Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only + differ in their ``group_keys`` argument. Calling `apply` in various ways, + we can get different grouping results: Example 1: The function passed to `apply` takes a Series as its argument and returns a Series. `apply` combines the result for @@ -242,18 +267,36 @@ class providing the base-class of operations. The resulting dtype will reflect the return value of the passed ``func``. - >>> g.apply(lambda x: x*2 if x.name == 'a' else x/2) + >>> g1.apply(lambda x: x*2 if x.name == 'a' else x/2) a 0.0 a 2.0 b 1.0 dtype: float64 + In the above, the groups are not part of the index. We can have them included + by using ``g2`` where ``group_keys=True``: + + >>> g2.apply(lambda x: x*2 if x.name == 'a' else x/2) + a a 0.0 + a 2.0 + b b 1.0 + dtype: float64 + Example 2: The function passed to `apply` takes a Series as its argument and returns a scalar. `apply` combines the result for each group together into a Series, including setting the index as appropriate: - >>> g.apply(lambda x: x.max() - x.min()) + >>> g1.apply(lambda x: x.max() - x.min()) + a 1 + b 0 + dtype: int64 + + The ``group_keys`` argument has no effect here because the result is not + like-indexed (i.e. :ref:`a transform `) when compared + to the input. + + >>> g2.apply(lambda x: x.max() - x.min()) a 1 b 0 dtype: int64""", diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 3750a8a3ceed9..3a8a95865d10e 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -115,9 +115,12 @@ Note this does not influence the order of observations within each group. Groupby preserves the order of rows within each group. group_keys : bool, optional - When calling apply, add group keys to index to identify pieces. - By default group keys are not included when the result's index - (and column) labels match the inputs, and are included otherwise. + When calling apply and the ``by`` argument produces a like-indexed + (i.e. :ref:`a transform `) result, add group keys to + index to identify pieces. By default group keys are not included + when the result's index (and column) labels match the inputs, and + are included otherwise. This argument has no effect if the result produced + is not like-indexed with respect to the input. .. versionchanged:: 1.5.0