From a3ff76a200a5599c569404ada74c85dad3de37fe Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 22 Dec 2023 02:45:36 +0000 Subject: [PATCH] docs: code samples for `reset_index` and `sort_values` (#282) * docs: code samples for `reset_index` and `sort_values` * fix alignment in dataframe api code samples --- .../bigframes_vendored/pandas/core/frame.py | 161 ++++++++++++++++++ .../bigframes_vendored/pandas/core/series.py | 110 ++++++++++++ 2 files changed, 271 insertions(+) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 427e586c52..fb34193710 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -1138,6 +1138,93 @@ def reset_index( Reset the index of the DataFrame, and use the default one instead. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> import numpy as np + >>> df = bpd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df + class max_speed + falcon bird 389.0 + parrot bird 24.0 + lion mammal 80.5 + monkey mammal + + [4 rows x 2 columns] + + When we reset the index, the old index is added as a column, and a new sequential index is used: + + >>> df.reset_index() + index class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal + + [4 rows x 3 columns] + + We can use the ``drop`` parameter to avoid the old index being added as a column: + + >>> df.reset_index(drop=True) + class max_speed + 0 bird 389.0 + 1 bird 24.0 + 2 mammal 80.5 + 3 mammal + + [4 rows x 2 columns] + + You can also use ``reset_index`` with ``MultiIndex``. + + >>> import pandas as pd + >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), + ... ('bird', 'parrot'), + ... ('mammal', 'lion'), + ... ('mammal', 'monkey')], + ... names=['class', 'name']) + >>> columns = ['speed', 'max'] + >>> df = bpd.DataFrame([(389.0, 'fly'), + ... (24.0, 'fly'), + ... (80.5, 'run'), + ... (np.nan, 'jump')], + ... index=index, + ... columns=columns) + >>> df + speed max + class name + bird falcon 389.0 fly + parrot 24.0 fly + mammal lion 80.5 run + monkey jump + + [4 rows x 2 columns] + + >>> df.reset_index() + class name speed max + 0 bird falcon 389.0 fly + 1 bird parrot 24.0 fly + 2 mammal lion 80.5 run + 3 mammal monkey jump + + [4 rows x 4 columns] + + >>> df.reset_index(drop=True) + speed max + 0 389.0 fly + 1 24.0 fly + 2 80.5 run + 3 jump + + [4 rows x 2 columns] + + Args: drop (bool, default False): Do not try to insert index into dataframe columns. This resets @@ -1347,6 +1434,80 @@ def sort_values( ) -> DataFrame: """Sort by the values along row axis. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({ + ... 'col1': ['A', 'A', 'B', bpd.NA, 'D', 'C'], + ... 'col2': [2, 1, 9, 8, 7, 4], + ... 'col3': [0, 1, 9, 4, 2, 3], + ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] + ... }) + >>> df + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 3 8 4 D + 4 D 7 2 e + 5 C 4 3 F + + [6 rows x 4 columns] + + Sort by col1: + + >>> df.sort_values(by=['col1']) + col1 col2 col3 col4 + 0 A 2 0 a + 1 A 1 1 B + 2 B 9 9 c + 5 C 4 3 F + 4 D 7 2 e + 3 8 4 D + + [6 rows x 4 columns] + + Sort by multiple columns: + + >>> df.sort_values(by=['col1', 'col2']) + col1 col2 col3 col4 + 1 A 1 1 B + 0 A 2 0 a + 2 B 9 9 c + 5 C 4 3 F + 4 D 7 2 e + 3 8 4 D + + [6 rows x 4 columns] + + Sort Descending: + + >>> df.sort_values(by='col1', ascending=False) + col1 col2 col3 col4 + 4 D 7 2 e + 5 C 4 3 F + 2 B 9 9 c + 0 A 2 0 a + 1 A 1 1 B + 3 8 4 D + + [6 rows x 4 columns] + + Putting NAs first: + + >>> df.sort_values(by='col1', ascending=False, na_position='first') + col1 col2 col3 col4 + 3 8 4 D + 4 D 7 2 e + 5 C 4 3 F + 2 B 9 9 c + 0 A 2 0 a + 1 A 1 1 B + + [6 rows x 4 columns] + Args: by (str or Sequence[str]): Name or list of names to sort by. diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 01cc3a0500..778ad68e0e 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -168,6 +168,53 @@ def reset_index( when the index is meaningless and needs to be reset to the default before another operation. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([1, 2, 3, 4], name='foo', + ... index=['a', 'b', 'c', 'd']) + >>> s.index.name = "idx" + >>> s + idx + a 1 + b 2 + c 3 + d 4 + Name: foo, dtype: Int64 + + Generate a DataFrame with default index. + + >>> s.reset_index() + idx foo + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + [4 rows x 2 columns] + + To specify the name of the new column use ``name`` param. + + >>> s.reset_index(name="bar") + idx bar + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + [4 rows x 2 columns] + + To generate a new Series with the default index set param ``drop=True``. + + >>> s.reset_index(drop=True) + 0 1 + 1 2 + 2 3 + 3 4 + Name: foo, dtype: Int64 + Args: drop (bool, default False): Just reset the index, without inserting it as a column in @@ -699,6 +746,69 @@ def sort_values( Sort a Series in ascending or descending order by some criterion. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([np.nan, 1, 3, 10, 5]) + >>> s + 0 + 1 1.0 + 2 3.0 + 3 10.0 + 4 5.0 + dtype: Float64 + + Sort values ascending order (default behaviour): + + >>> s.sort_values(ascending=True) + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + 0 + dtype: Float64 + + Sort values descending order: + + >>> s.sort_values(ascending=False) + 3 10.0 + 4 5.0 + 2 3.0 + 1 1.0 + 0 + dtype: Float64 + + Sort values putting NAs first: + + >>> s.sort_values(na_position='first') + 0 + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + dtype: Float64 + + Sort a series of strings: + + >>> s = bpd.Series(['z', 'b', 'd', 'a', 'c']) + >>> s + 0 z + 1 b + 2 d + 3 a + 4 c + dtype: string + + >>> s.sort_values() + 3 a + 1 b + 4 c + 2 d + 0 z + dtype: string + Args: axis (0 or 'index'): Unused. Parameter needed for compatibility with DataFrame.