From e4d9132e2ae2e1b956f74a93bd56364f04e4eba4 Mon Sep 17 00:00:00 2001
From: Gabriel Moreira <gmoreira@nvidia.com>
Date: Tue, 17 May 2022 15:40:40 -0300
Subject: [PATCH 1/2] Improved docstrings of GroupBy op to reinforce the
 required usage of dataset.shuffle_by_keys() before

---
 nvtabular/ops/groupby.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/nvtabular/ops/groupby.py b/nvtabular/ops/groupby.py
index dc315b7c4b0..14614602a01 100644
--- a/nvtabular/ops/groupby.py
+++ b/nvtabular/ops/groupby.py
@@ -34,10 +34,13 @@ class Groupby(Operator):
 
     Example usage::
 
+        groupby_cols = ['user_id', 'session_id']
+        dataset = dataset.shuffle_by_keys(keys=groupby_cols)
+
         groupby_features = [
             'user_id', 'session_id', 'month', 'prod_id',
         ] >> ops.Groupby(
-            groupby_cols=['user_id', 'session_id'],
+            groupby_cols=groupby_cols,
             sort_cols=['month'],
             aggs={
                 'prod_id': 'list',
@@ -46,10 +49,15 @@ class Groupby(Operator):
         )
         processor = nvtabular.Workflow(groupby_features)
 
+        workflow.fit(dataset)
+        dataset_transformed = workflow.transform(dataset)
+
     Parameters
     -----------
     groupby_cols : str or list of str
         The column names to be used as groupby keys.
+        WARNING: Ensure the dataset was partitioned by those
+        groupby keys (see above an example).
     sort_cols : str or list of str
         Columns to be used to sort each partition before
         groupby aggregation is performed. If this argument

From 3dbcf74005b9b20feba97db1ff97475afa807875 Mon Sep 17 00:00:00 2001
From: Karl Higley <kmhigley@gmail.com>
Date: Thu, 19 May 2022 21:27:50 -0400
Subject: [PATCH 2/2] Update nvtabular/ops/groupby.py

Co-authored-by: Ben Frederickson <github@benfrederickson.com>
---
 nvtabular/ops/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nvtabular/ops/groupby.py b/nvtabular/ops/groupby.py
index 14614602a01..b65a1800e98 100644
--- a/nvtabular/ops/groupby.py
+++ b/nvtabular/ops/groupby.py
@@ -57,7 +57,7 @@ class Groupby(Operator):
     groupby_cols : str or list of str
         The column names to be used as groupby keys.
         WARNING: Ensure the dataset was partitioned by those
-        groupby keys (see above an example).
+        groupby keys (see above for an example).
     sort_cols : str or list of str
         Columns to be used to sort each partition before
         groupby aggregation is performed. If this argument