From 0e1bbfc1055aff9757b5138907c11caab2f3965a Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Tue, 26 Dec 2023 19:10:15 +0000 Subject: [PATCH] docs: code samples for `Series.{add, replace, unique, T, transpose}` (#287) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) - `Series.add`: https://screenshot.googleplex.com/763p36yJKKvM5DY - `Series.replace`: https://screenshot.googleplex.com/9MHuQZnKakqjjJn - `Series.unique`: https://screenshot.googleplex.com/7BfuQE3bFcYASqu - `Series.T`: https://screenshot.googleplex.com/8cSYpwKXrYetsEg - `Series.transpose`: https://screenshot.googleplex.com/7mM2zBwxRiqfDUV Fixes internal issue 317297573 🦕 --- .../bigframes_vendored/pandas/core/frame.py | 4 +- .../bigframes_vendored/pandas/core/series.py | 190 +++++++++++++++++- 2 files changed, 191 insertions(+), 3 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 2de63b9103..d7ecae102b 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -4371,7 +4371,7 @@ def stack(self, level=-1): BigQuery DataFrames does not support stack operations that would combine columns of different dtypes. - **Example:** + **Examples:** >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None @@ -4410,7 +4410,7 @@ def unstack(self, level=-1): If the index is not a MultiIndex, the output will be a Series (the analogue of stack when the columns are not a MultiIndex). - **Example:** + **Examples:** >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index cbe0963051..b0a4cb8193 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -143,13 +143,51 @@ def name(self) -> Hashable: @property def T(self) -> Series: - """Return the transpose, which is by definition self.""" + """Return the transpose, which is by definition self. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) + >>> s + 0 Ant + 1 Bear + 2 Cow + dtype: string + + >>> s.T + 0 Ant + 1 Bear + 2 Cow + dtype: string + + """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def transpose(self) -> Series: """ Return the transpose, which is by definition self. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) + >>> s + 0 Ant + 1 Bear + 2 Cow + dtype: string + + >>> s.transpose() + 0 Ant + 1 Bear + 2 Cow + dtype: string + Returns: Series: Series. """ @@ -539,6 +577,36 @@ def nunique(self) -> int: """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def unique(self) -> Series: + """ + Return unique values of Series object. + + Uniques are returned in order of appearance. Hash table-based unique, + therefore does NOT sort. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([2, 1, 3, 3], name='A') + >>> s + 0 2 + 1 1 + 2 3 + 3 3 + Name: A, dtype: Int64 + >>> s.unique() + 0 2 + 1 1 + 2 3 + Name: A, dtype: Int64 + + Returns: + Series: The unique values returned as a Series. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def mode(self) -> Series: """ Return the mode(s) of the Series. @@ -1405,6 +1473,77 @@ def replace( This differs from updating with ``.loc`` or ``.iloc``, which require you to specify a location to update with some value. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3, 4, 5]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: Int64 + + >>> s.replace(1, 5) + 0 5 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: Int64 + + You can replace a list of values: + + >>> s.replace([1, 3, 5], -1) + 0 -1 + 1 2 + 2 -1 + 3 4 + 4 -1 + dtype: Int64 + + You can use a replacement mapping: + + >>> s.replace({1: 5, 3: 10}) + 0 5 + 1 2 + 2 10 + 3 4 + 4 5 + dtype: Int64 + + With a string Series you can use a simple string replacement or a regex + replacement: + + >>> s = bpd.Series(["Hello", "Another Hello"]) + >>> s.replace("Hello", "Hi") + 0 Hi + 1 Another Hello + dtype: string + + >>> s.replace("Hello", "Hi", regex=True) + 0 Hi + 1 Another Hi + dtype: string + + >>> s.replace("^Hello", "Hi", regex=True) + 0 Hi + 1 Another Hello + dtype: string + + >>> s.replace("Hello$", "Hi", regex=True) + 0 Hi + 1 Another Hi + dtype: string + + >>> s.replace("[Hh]e", "__", regex=True) + 0 __llo + 1 Anot__r __llo + dtype: string + Args: to_replace (str, regex, list, int, float or None): How to find the values that will be replaced. @@ -1702,6 +1841,55 @@ def add(self, other) -> Series: Equivalent to ``series + other``, but with support to substitute a fill_value for missing data in either one of the inputs. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> a = bpd.Series([1, 2, 3, bpd.NA]) + >>> a + 0 1.0 + 1 2.0 + 2 3.0 + 3 + dtype: Float64 + + >>> b = bpd.Series([10, 20, 30, 40]) + >>> b + 0 10 + 1 20 + 2 30 + 3 40 + dtype: Int64 + + >>> a.add(b) + 0 11.0 + 1 22.0 + 2 33.0 + 3 + dtype: Float64 + + You can also use the mathematical operator ``+``: + + >>> a + b + 0 11.0 + 1 22.0 + 2 33.0 + 3 + dtype: Float64 + + Adding two Series with explicit indexes: + + >>> a = bpd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) + >>> b = bpd.Series([10, 20, 30, 40], index=['a', 'b', 'd', 'e']) + >>> a.add(b) + a 11 + b 22 + c + d 34 + e + dtype: Int64 + Args: other (Series, or scalar value):