From ce32778005351b478436fb1e53dfb165adaae091 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 24 Oct 2024 11:18:30 -0500 Subject: [PATCH 1/5] chore: add groupby.head and groupby.size methods --- .../pandas/core/groupby/__init__.py | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py index 6011dbfe5b..06cafebd3d 100644 --- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py +++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py @@ -977,6 +977,81 @@ def expanding(self, *args, **kwargs): """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def head(self, n: int = 5): + """ + Return last first n rows of each group + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame([[1, 2], [1, 4], [5, 6]], + ... columns=['A', 'B']) + >>> df.groupby('A').head(1) + A B + 0 1 2 + 2 5 6 + + Args: + n (int): + If positive: number of entries to include from start of each group. + If negative: number of entries to exclude from end of each group. + + Returns: + bigframes.pandas.DataFrame or bigframes.pandas.Series: + First n rows of the original DataFrame or Series + + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def size(self): + """ + Compute group sizes. + + **Examples:** + + For SeriesGroupBy: + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> lst = ['a', 'a', 'b'] + >>> ser = bpd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: int64 + >>> ser.groupby(level=0).size() + a 2 + b 1 + dtype: int64 + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + ... index=["owl", "toucan", "eagle"]) + >>> df + a b c + owl 1 2 3 + toucan 1 5 6 + eagle 7 8 9 + >>> df.groupby("a").size() + a + 1 2 + 7 1 + dtype: int64 + + Returns: + bigframes.pandas.DataFrame or bigframes.pandas.Series: + Number of rows in each group as a Series if as_index is True + or a DataFrame if as_index is False. + + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + class SeriesGroupBy(GroupBy): def agg(self, func): From cb2abbf4565fa9e613e49af811ae785662383b7a Mon Sep 17 00:00:00 2001 From: Arwa Date: Wed, 30 Oct 2024 15:45:46 -0500 Subject: [PATCH 2/5] Fix failing doctest --- .../bigframes_vendored/pandas/core/groupby/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py index 06cafebd3d..75252b4f52 100644 --- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py +++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py @@ -992,6 +992,7 @@ def head(self, n: int = 5): A B 0 1 2 2 5 6 + [2 rows x 2 columns] Args: n (int): @@ -1022,11 +1023,11 @@ def size(self): a 1 a 2 b 3 - dtype: int64 + dtype: Int64 >>> ser.groupby(level=0).size() a 2 b 1 - dtype: int64 + dtype: Int64 For DataFrameGroupBy: From 86b1881d34d09b8b4468600892652aa9204d0206 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 31 Oct 2024 13:57:03 -0500 Subject: [PATCH 3/5] Fix doctest error --- .../bigframes_vendored/pandas/core/groupby/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py index 75252b4f52..06f2ce094c 100644 --- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py +++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py @@ -1032,18 +1032,19 @@ def size(self): For DataFrameGroupBy: >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], + >>> df = bpd.DataFrame(data, columns=["a", "b", "c"], ... index=["owl", "toucan", "eagle"]) >>> df a b c owl 1 2 3 toucan 1 5 6 eagle 7 8 9 + [3 rows x 3 columns in total] >>> df.groupby("a").size() a 1 2 7 1 - dtype: int64 + dtype: Int64 Returns: bigframes.pandas.DataFrame or bigframes.pandas.Series: From 1d2028949bd9bf7fae68f856046c545c2c1fe3e9 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 31 Oct 2024 15:25:12 -0500 Subject: [PATCH 4/5] Fix doctest error --- third_party/bigframes_vendored/pandas/core/groupby/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py index 26c9d08af1..2362567a33 100644 --- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py +++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py @@ -1059,7 +1059,7 @@ def size(self): owl 1 2 3 toucan 1 5 6 eagle 7 8 9 - [3 rows x 3 columns in total] + [3 rows x 3 columns] >>> df.groupby("a").size() a 1 2 From 9575f79a4b513e71e250b715d23e9b4930329be5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 12 Nov 2024 11:09:42 -0600 Subject: [PATCH 5/5] Update third_party/bigframes_vendored/pandas/core/groupby/__init__.py --- .../pandas/core/groupby/__init__.py | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py index 2362567a33..1e30d827ca 100644 --- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py +++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py @@ -1028,43 +1028,43 @@ def head(self, n: int = 5): def size(self): """ - Compute group sizes. - - **Examples:** - - For SeriesGroupBy: - - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - - >>> lst = ['a', 'a', 'b'] - >>> ser = bpd.Series([1, 2, 3], index=lst) - >>> ser - a 1 - a 2 - b 3 - dtype: Int64 - >>> ser.groupby(level=0).size() - a 2 - b 1 - dtype: Int64 - - For DataFrameGroupBy: - - >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] - >>> df = bpd.DataFrame(data, columns=["a", "b", "c"], - ... index=["owl", "toucan", "eagle"]) - >>> df - a b c - owl 1 2 3 - toucan 1 5 6 - eagle 7 8 9 - [3 rows x 3 columns] - >>> df.groupby("a").size() - a - 1 2 - 7 1 - dtype: Int64 + Compute group sizes. + + **Examples:** + + For SeriesGroupBy: + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> lst = ['a', 'a', 'b'] + >>> ser = bpd.Series([1, 2, 3], index=lst) + >>> ser + a 1 + a 2 + b 3 + dtype: Int64 + >>> ser.groupby(level=0).size() + a 2 + b 1 + dtype: Int64 + + For DataFrameGroupBy: + + >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] + >>> df = bpd.DataFrame(data, columns=["a", "b", "c"], + ... index=["owl", "toucan", "eagle"]) + >>> df + a b c + owl 1 2 3 + toucan 1 5 6 + eagle 7 8 9 + [3 rows x 3 columns] + >>> df.groupby("a").size() + a + 1 2 + 7 1 + dtype: Int64 Returns: bigframes.pandas.DataFrame or bigframes.pandas.Series: