diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index ffbee0bf21a66..af2a5579bf1cd 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -188,21 +188,33 @@ class providing the base-class of operations.
     >>> df = pd.DataFrame({'A': 'a a b'.split(),
     ...                    'B': [1,2,3],
     ...                    'C': [4,6,5]})
-    >>> g = df.groupby('A')
+    >>> g1 = df.groupby('A', group_keys=False)
+    >>> g2 = df.groupby('A', group_keys=True)
 
-    Notice that ``g`` has two groups, ``a`` and ``b``.
-    Calling `apply` in various ways, we can get different grouping results:
+    Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only
+    differ in their ``group_keys`` argument. Calling `apply` in various ways,
+    we can get different grouping results:
 
     Example 1: below the function passed to `apply` takes a DataFrame as
     its argument and returns a DataFrame. `apply` combines the result for
     each group together into a new DataFrame:
 
-    >>> g[['B', 'C']].apply(lambda x: x / x.sum())
+    >>> g1[['B', 'C']].apply(lambda x: x / x.sum())
               B    C
     0  0.333333  0.4
     1  0.666667  0.6
     2  1.000000  1.0
 
+    In the above, the groups are not part of the index. We can have them included
+    by using ``g2`` where ``group_keys=True``:
+
+    >>> g2[['B', 'C']].apply(lambda x: x / x.sum())
+                B    C
+    A
+    a 0  0.333333  0.4
+      1  0.666667  0.6
+    b 2  1.000000  1.0
+
     Example 2: The function passed to `apply` takes a DataFrame as
     its argument and returns a Series.  `apply` combines the result for
     each group together into a new DataFrame.
@@ -211,28 +223,41 @@ class providing the base-class of operations.
 
         The resulting dtype will reflect the return value of the passed ``func``.
 
-    >>> g[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min())
+    >>> g1[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min())
+         B    C
+    A
+    a  1.0  2.0
+    b  0.0  0.0
+
+    >>> g2[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min())
          B    C
     A
     a  1.0  2.0
     b  0.0  0.0
 
+    The ``group_keys`` argument has no effect here because the result is not
+    like-indexed (i.e. :ref:`a transform <groupby.transform>`) when compared
+    to the input.
+
     Example 3: The function passed to `apply` takes a DataFrame as
     its argument and returns a scalar. `apply` combines the result for
     each group together into a Series, including setting the index as
     appropriate:
 
-    >>> g.apply(lambda x: x.C.max() - x.B.min())
+    >>> g1.apply(lambda x: x.C.max() - x.B.min())
     A
     a    5
     b    2
     dtype: int64""",
     "series_examples": """
     >>> s = pd.Series([0, 1, 2], index='a a b'.split())
-    >>> g = s.groupby(s.index)
+    >>> g1 = s.groupby(s.index, group_keys=False)
+    >>> g2 = s.groupby(s.index, group_keys=True)
 
     From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``.
-    Calling `apply` in various ways, we can get different grouping results:
+    Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only
+    differ in their ``group_keys`` argument. Calling `apply` in various ways,
+    we can get different grouping results:
 
     Example 1: The function passed to `apply` takes a Series as
     its argument and returns a Series.  `apply` combines the result for
@@ -242,18 +267,36 @@ class providing the base-class of operations.
 
         The resulting dtype will reflect the return value of the passed ``func``.
 
-    >>> g.apply(lambda x: x*2 if x.name == 'a' else x/2)
+    >>> g1.apply(lambda x: x*2 if x.name == 'a' else x/2)
     a    0.0
     a    2.0
     b    1.0
     dtype: float64
 
+    In the above, the groups are not part of the index. We can have them included
+    by using ``g2`` where ``group_keys=True``:
+
+    >>> g2.apply(lambda x: x*2 if x.name == 'a' else x/2)
+    a  a    0.0
+       a    2.0
+    b  b    1.0
+    dtype: float64
+
     Example 2: The function passed to `apply` takes a Series as
     its argument and returns a scalar. `apply` combines the result for
     each group together into a Series, including setting the index as
     appropriate:
 
-    >>> g.apply(lambda x: x.max() - x.min())
+    >>> g1.apply(lambda x: x.max() - x.min())
+    a    1
+    b    0
+    dtype: int64
+
+    The ``group_keys`` argument has no effect here because the result is not
+    like-indexed (i.e. :ref:`a transform <groupby.transform>`) when compared
+    to the input.
+
+    >>> g2.apply(lambda x: x.max() - x.min())
     a    1
     b    0
     dtype: int64""",
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 3750a8a3ceed9..3a8a95865d10e 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -115,9 +115,12 @@
     Note this does not influence the order of observations within each
     group. Groupby preserves the order of rows within each group.
 group_keys : bool, optional
-    When calling apply, add group keys to index to identify pieces.
-    By default group keys are not included when the result's index
-    (and column) labels match the inputs, and are included otherwise.
+    When calling apply and the ``by`` argument produces a like-indexed
+    (i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
+    index to identify pieces. By default group keys are not included
+    when the result's index (and column) labels match the inputs, and
+    are included otherwise. This argument has no effect if the result produced
+    is not like-indexed with respect to the input.
 
     .. versionchanged:: 1.5.0