From 5511af08e9bd8b0610d3c68d98a9c781460f6e4e Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Mon, 30 May 2022 23:32:31 -0400
Subject: [PATCH 1/2] DOC: Add to docs on group_keys in groupby.apply

---
 pandas/core/groupby/groupby.py | 61 ++++++++++++++++++++++++++++------
 pandas/core/shared_docs.py     |  8 +++--
 2 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index ffbee0bf21a66..5ce0657ca7d47 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -188,21 +188,33 @@ class providing the base-class of operations.
     >>> df = pd.DataFrame({'A': 'a a b'.split(),
     ...                    'B': [1,2,3],
     ...                    'C': [4,6,5]})
-    >>> g = df.groupby('A')
+    >>> g1 = df.groupby('A', group_keys=False)
+    >>> g2 = df.groupby('A', group_keys=True)
 
-    Notice that ``g`` has two groups, ``a`` and ``b``.
-    Calling `apply` in various ways, we can get different grouping results:
+    Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only
+    differ in their ``group_keys`` argument. Calling `apply` in various ways,
+    we can get different grouping results:
 
     Example 1: below the function passed to `apply` takes a DataFrame as
     its argument and returns a DataFrame. `apply` combines the result for
     each group together into a new DataFrame:
 
-    >>> g[['B', 'C']].apply(lambda x: x / x.sum())
+    >>> g1[['B', 'C']].apply(lambda x: x / x.sum())
               B    C
     0  0.333333  0.4
     1  0.666667  0.6
     2  1.000000  1.0
 
+    In the above, the groups are not part of the index. We can have them included
+    by using ``g2`` where ``group_keys=True``:
+
+    >>> g2[['B', 'C']].apply(lambda x: x / x.sum())
+                B    C
+    A
+    a 0  0.333333  0.4
+      1  0.666667  0.6
+    b 2  1.000000  1.0
+
     Example 2: The function passed to `apply` takes a DataFrame as
     its argument and returns a Series.  `apply` combines the result for
     each group together into a new DataFrame.
@@ -211,28 +223,40 @@ class providing the base-class of operations.
 
         The resulting dtype will reflect the return value of the passed ``func``.
 
-    >>> g[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min())
+    >>> g1[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min())
+         B    C
+    A
+    a  1.0  2.0
+    b  0.0  0.0
+
+    >>> g2[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min())
          B    C
     A
     a  1.0  2.0
     b  0.0  0.0
 
+    The ``group_keys`` argument has no effect here because the result is not
+    like-indexed when compared to the input.
+
     Example 3: The function passed to `apply` takes a DataFrame as
     its argument and returns a scalar. `apply` combines the result for
     each group together into a Series, including setting the index as
     appropriate:
 
-    >>> g.apply(lambda x: x.C.max() - x.B.min())
+    >>> g1.apply(lambda x: x.C.max() - x.B.min())
     A
     a    5
     b    2
     dtype: int64""",
     "series_examples": """
     >>> s = pd.Series([0, 1, 2], index='a a b'.split())
-    >>> g = s.groupby(s.index)
+    >>> g1 = s.groupby(s.index, group_keys=False)
+    >>> g2 = s.groupby(s.index, group_keys=True)
 
     From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``.
-    Calling `apply` in various ways, we can get different grouping results:
+    Notice that ``g1`` have ``g2`` have two groups, ``a`` and ``b``, and only
+    differ in their ``group_keys`` argument. Calling `apply` in various ways,
+    we can get different grouping results:
 
     Example 1: The function passed to `apply` takes a Series as
     its argument and returns a Series.  `apply` combines the result for
@@ -242,18 +266,35 @@ class providing the base-class of operations.
 
         The resulting dtype will reflect the return value of the passed ``func``.
 
-    >>> g.apply(lambda x: x*2 if x.name == 'a' else x/2)
+    >>> g1.apply(lambda x: x*2 if x.name == 'a' else x/2)
     a    0.0
     a    2.0
     b    1.0
     dtype: float64
 
+    In the above, the groups are not part of the index. We can have them included
+    by using ``g2`` where ``group_keys=True``:
+
+    >>> g2.apply(lambda x: x*2 if x.name == 'a' else x/2)
+    a  a    0.0
+       a    2.0
+    b  b    1.0
+    dtype: float64
+
     Example 2: The function passed to `apply` takes a Series as
     its argument and returns a scalar. `apply` combines the result for
     each group together into a Series, including setting the index as
     appropriate:
 
-    >>> g.apply(lambda x: x.max() - x.min())
+    >>> g1.apply(lambda x: x.max() - x.min())
+    a    1
+    b    0
+    dtype: int64
+
+    The ``group_keys`` argument has no effect here because the result is not
+    like-indexed when compared to the input.
+
+    >>> g2.apply(lambda x: x.max() - x.min())
     a    1
     b    0
     dtype: int64""",
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 3750a8a3ceed9..bc9c7764d094a 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -115,9 +115,11 @@
     Note this does not influence the order of observations within each
     group. Groupby preserves the order of rows within each group.
 group_keys : bool, optional
-    When calling apply, add group keys to index to identify pieces.
-    By default group keys are not included when the result's index
-    (and column) labels match the inputs, and are included otherwise.
+    When calling apply and the ``by`` argument produces a like-indexed (transformed)
+    result, add group keys to index to identify pieces. By default group keys are not
+    included when the result's index (and column) labels match the inputs, and
+    are included otherwise. This argument has no effect if the result produced
+    is not like-indexed with respect to the input.
 
     .. versionchanged:: 1.5.0
 

From ea190c86c6636e1550eeccd04c6c9922c1ee9110 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Wed, 1 Jun 2022 21:35:11 -0400
Subject: [PATCH 2/2] Add link to user guide

---
 pandas/core/groupby/groupby.py | 6 ++++--
 pandas/core/shared_docs.py     | 7 ++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 5ce0657ca7d47..af2a5579bf1cd 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -236,7 +236,8 @@ class providing the base-class of operations.
     b  0.0  0.0
 
     The ``group_keys`` argument has no effect here because the result is not
-    like-indexed when compared to the input.
+    like-indexed (i.e. :ref:`a transform <groupby.transform>`) when compared
+    to the input.
 
     Example 3: The function passed to `apply` takes a DataFrame as
     its argument and returns a scalar. `apply` combines the result for
@@ -292,7 +293,8 @@ class providing the base-class of operations.
     dtype: int64
 
     The ``group_keys`` argument has no effect here because the result is not
-    like-indexed when compared to the input.
+    like-indexed (i.e. :ref:`a transform <groupby.transform>`) when compared
+    to the input.
 
     >>> g2.apply(lambda x: x.max() - x.min())
     a    1
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index bc9c7764d094a..3a8a95865d10e 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -115,9 +115,10 @@
     Note this does not influence the order of observations within each
     group. Groupby preserves the order of rows within each group.
 group_keys : bool, optional
-    When calling apply and the ``by`` argument produces a like-indexed (transformed)
-    result, add group keys to index to identify pieces. By default group keys are not
-    included when the result's index (and column) labels match the inputs, and
+    When calling apply and the ``by`` argument produces a like-indexed
+    (i.e. :ref:`a transform <groupby.transform>`) result, add group keys to
+    index to identify pieces. By default group keys are not included
+    when the result's index (and column) labels match the inputs, and
     are included otherwise. This argument has no effect if the result produced
     is not like-indexed with respect to the input.