From 0e2f16f6ab21f196a9c03b541e39398af22cccfe Mon Sep 17 00:00:00 2001 From: jialuo Date: Wed, 16 Oct 2024 19:17:28 +0000 Subject: [PATCH 1/4] feat: series items method --- bigframes/series.py | 9 +++++ tests/system/small/test_series.py | 20 +++++++++++ .../bigframes_vendored/pandas/core/series.py | 36 +++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/bigframes/series.py b/bigframes/series.py index 1a913f18d7..8a08e6692e 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1117,6 +1117,15 @@ def ne(self, other: object) -> Series: # TODO: enforce stricter alignment return self._apply_binary_op(other, ops.ne_op) + def items(self): + column_ids = self._block.value_columns + assert len(column_ids) == 1, ( + f"Expected lenght of column ids to be 1, but got {len(column_ids)}." + ) + bpd_series = bigframes.series.Series(self._block.select_column(column_ids[0])) + for index, value in zip(bpd_series.index, bpd_series.values): + yield index, value + def where(self, cond, other=None): value_id, cond_id, other_id, block = self._align3(cond, other) block, result_id = block.project_expr( diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index f1c60664a1..ebed680ed5 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -212,6 +212,26 @@ def test_series_construct_from_list_escaped_strings(): pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) +@pytest.mark.parametrize( + ["data", "index"], + [ + (["a", "b", "c"], None), + ([1, 2, 3], ["a", "b", "c"]), + ([1, 2, None], ["a", "b", "c"]), + ([1, 2, 3], [pd.NA, "b", "c"]), + ([numpy.nan, 2, 3], ["a", "b", "c"]), + ], +) +def test_series_items(data, index): + bf_series = series.Series(data, index=index) + pd_series = pd.Series(data, index=index) + + for (bf_index, bf_value), (pd_index, pd_value) in zip(bf_series.items(), pd_series.items()): + # TODO(jialuo): Remove the if conditions after b/373699458 is addressed. + if not pd.isna(bf_index) or not pd.isna(pd_index): + assert bf_index == pd_index + if not pd.isna(bf_value) or not pd.isna(pd_value): + assert bf_value == pd_value @pytest.mark.parametrize( ["col_name", "expected_dtype"], diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index a6363e3285..f3efafa885 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -3332,6 +3332,42 @@ def kurt(self): """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def items(self): + """ + Iterate over (index, value) pairs of a Series. + + Iterates over the Series contents, returning a tuple with + the index and the value of a Series. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.Series(['bear', 'bear', 'marsupial'], + ... index=['panda', 'polar', 'koala']) + >>> df + panda bear + polar bear + koala marsupial + dtype: string + + >>> for index, value in df.items(): + ... print(f'--> index: {index}') + ... print(f'--> value: {value}') + ... + --> index: panda + --> value: bear + --> index: polar + --> value: bear + --> index: kaola + --> value: marsupial + + Returns: + Iterator: Iterator of index, value for each content of the Series. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def where(self, cond, other): """Replace values where the condition is False. From 762fc3be816e373c690a2e6a4185e8a99722e214 Mon Sep 17 00:00:00 2001 From: jialuo Date: Thu, 17 Oct 2024 00:36:09 +0000 Subject: [PATCH 2/4] fix lint format --- bigframes/series.py | 6 +++--- tests/system/small/test_series.py | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/bigframes/series.py b/bigframes/series.py index 8a08e6692e..9b1e93d13e 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1119,9 +1119,9 @@ def ne(self, other: object) -> Series: def items(self): column_ids = self._block.value_columns - assert len(column_ids) == 1, ( - f"Expected lenght of column ids to be 1, but got {len(column_ids)}." - ) + assert ( + len(column_ids) == 1 + ), f"Expected lenght of column ids to be 1, but got {len(column_ids)}." bpd_series = bigframes.series.Series(self._block.select_column(column_ids[0])) for index, value in zip(bpd_series.index, bpd_series.values): yield index, value diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index ebed680ed5..d7e4f0f112 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -212,6 +212,7 @@ def test_series_construct_from_list_escaped_strings(): pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + @pytest.mark.parametrize( ["data", "index"], [ @@ -226,13 +227,16 @@ def test_series_items(data, index): bf_series = series.Series(data, index=index) pd_series = pd.Series(data, index=index) - for (bf_index, bf_value), (pd_index, pd_value) in zip(bf_series.items(), pd_series.items()): + for (bf_index, bf_value), (pd_index, pd_value) in zip( + bf_series.items(), pd_series.items() + ): # TODO(jialuo): Remove the if conditions after b/373699458 is addressed. if not pd.isna(bf_index) or not pd.isna(pd_index): assert bf_index == pd_index if not pd.isna(bf_value) or not pd.isna(pd_value): assert bf_value == pd_value + @pytest.mark.parametrize( ["col_name", "expected_dtype"], [ From c05e32b4357fe23debace531a8cdf0eeb99f57c9 Mon Sep 17 00:00:00 2001 From: jialuo Date: Thu, 17 Oct 2024 22:16:35 +0000 Subject: [PATCH 3/4] use to_pandas_batches --- bigframes/series.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/bigframes/series.py b/bigframes/series.py index 9b1e93d13e..215f4473ee 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1118,13 +1118,12 @@ def ne(self, other: object) -> Series: return self._apply_binary_op(other, ops.ne_op) def items(self): - column_ids = self._block.value_columns - assert ( - len(column_ids) == 1 - ), f"Expected lenght of column ids to be 1, but got {len(column_ids)}." - bpd_series = bigframes.series.Series(self._block.select_column(column_ids[0])) - for index, value in zip(bpd_series.index, bpd_series.values): - yield index, value + for batch_df in self._block.to_pandas_batches(): + assert ( + batch_df.shape[1] == 1 + ), f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." + for item in batch_df.squeeze(axis=1).items(): + yield item def where(self, cond, other=None): value_id, cond_id, other_id, block = self._align3(cond, other) From b85e0ca78fc7d58566219c7820688434d02ec276 Mon Sep 17 00:00:00 2001 From: jialuo Date: Fri, 18 Oct 2024 19:06:54 +0000 Subject: [PATCH 4/4] fix doctest --- third_party/bigframes_vendored/pandas/core/series.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index f3efafa885..845d623e2a 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -3344,15 +3344,15 @@ def items(self): >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> df = bpd.Series(['bear', 'bear', 'marsupial'], + >>> s = bpd.Series(['bear', 'bear', 'marsupial'], ... index=['panda', 'polar', 'koala']) - >>> df + >>> s panda bear polar bear koala marsupial dtype: string - >>> for index, value in df.items(): + >>> for index, value in s.items(): ... print(f'--> index: {index}') ... print(f'--> value: {value}') ... @@ -3360,7 +3360,7 @@ def items(self): --> value: bear --> index: polar --> value: bear - --> index: kaola + --> index: koala --> value: marsupial Returns: