From 245a89c36544faf2bcecb5735abbc00c0b4dd687 Mon Sep 17 00:00:00 2001 From: jialuoo Date: Wed, 23 Oct 2024 14:44:55 -0700 Subject: [PATCH] feat: support series items method (#1089) * feat: series items method * fix lint format * use to_pandas_batches * fix doctest --- bigframes/series.py | 8 +++++ tests/system/small/test_series.py | 24 +++++++++++++ .../bigframes_vendored/pandas/core/series.py | 36 +++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/bigframes/series.py b/bigframes/series.py index 1a913f18d7..215f4473ee 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1117,6 +1117,14 @@ def ne(self, other: object) -> Series: # TODO: enforce stricter alignment return self._apply_binary_op(other, ops.ne_op) + def items(self): + for batch_df in self._block.to_pandas_batches(): + assert ( + batch_df.shape[1] == 1 + ), f"Expected 1 column in the dataframe, but got {batch_df.shape[1]}." + for item in batch_df.squeeze(axis=1).items(): + yield item + def where(self, cond, other=None): value_id, cond_id, other_id, block = self._align3(cond, other) block, result_id = block.project_expr( diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 6cdb74fcf7..c29f91bc5c 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -227,6 +227,30 @@ def test_series_construct_geodata(): ) +@pytest.mark.parametrize( + ["data", "index"], + [ + (["a", "b", "c"], None), + ([1, 2, 3], ["a", "b", "c"]), + ([1, 2, None], ["a", "b", "c"]), + ([1, 2, 3], [pd.NA, "b", "c"]), + ([numpy.nan, 2, 3], ["a", "b", "c"]), + ], +) +def test_series_items(data, index): + bf_series = series.Series(data, index=index) + pd_series = pd.Series(data, index=index) + + for (bf_index, bf_value), (pd_index, pd_value) in zip( + bf_series.items(), pd_series.items() + ): + # TODO(jialuo): Remove the if conditions after b/373699458 is addressed. + if not pd.isna(bf_index) or not pd.isna(pd_index): + assert bf_index == pd_index + if not pd.isna(bf_value) or not pd.isna(pd_value): + assert bf_value == pd_value + + @pytest.mark.parametrize( ["col_name", "expected_dtype"], [ diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index a6363e3285..845d623e2a 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -3332,6 +3332,42 @@ def kurt(self): """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def items(self): + """ + Iterate over (index, value) pairs of a Series. + + Iterates over the Series contents, returning a tuple with + the index and the value of a Series. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['bear', 'bear', 'marsupial'], + ... index=['panda', 'polar', 'koala']) + >>> s + panda bear + polar bear + koala marsupial + dtype: string + + >>> for index, value in s.items(): + ... print(f'--> index: {index}') + ... print(f'--> value: {value}') + ... + --> index: panda + --> value: bear + --> index: polar + --> value: bear + --> index: koala + --> value: marsupial + + Returns: + Iterator: Iterator of index, value for each content of the Series. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def where(self, cond, other): """Replace values where the condition is False.