From 8c6f9fc3a201c2783cdae2ec45df51e1218f75d9 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 1 Mar 2021 14:45:39 -0800 Subject: [PATCH 1/3] add isin doc examples --- python/cudf/cudf/core/dataframe.py | 37 +++++++++++++++++++++++++++++ python/cudf/cudf/core/frame.py | 1 + python/cudf/cudf/core/multiindex.py | 31 ++++++++++++++++++++++++ python/cudf/cudf/core/series.py | 35 +++++++++++++++++++++++++++ 4 files changed, 104 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 8bdb36fc27d..6b8b16b7118 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -5713,6 +5713,43 @@ def isin(self, values): DataFrame: DataFrame of booleans showing whether each element in the DataFrame is contained in values. + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, + ... index=['falcon', 'dog']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + + When ``values`` is a list check whether every value in the DataFrame + is present in the list (which animals have 0 or 2 legs or wings) + + >>> df.isin([0, 2]) + num_legs num_wings + falcon True True + dog False True + + When ``values`` is a dict, we can pass values to check for each + column separately: + + >>> df.isin({'num_wings': [0, 3]}) + num_legs num_wings + falcon False False + dog False True + + When ``values`` is a Series or DataFrame the index and column must + match. Note that 'falcon' does not match based on the number of legs + in other. + + >>> other = cudf.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]}, + ... index=['spider', 'falcon']) + >>> df.isin(other) + num_legs num_wings + falcon True True + dog False False """ if isinstance(values, dict): diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index dedefeaf9a2..b5946653c77 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -239,6 +239,7 @@ def copy(self: T, deep: bool = True) -> T: ------- copy : Series or DataFrame Object type matches caller. + Examples -------- >>> s = cudf.Series([1, 2], index=["a", "b"]) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 19c5b827d50..d009228ce11 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -655,16 +655,47 @@ def isin(self, values, level=None): level : str or int, optional Name or position of the index level to use (if the index is a MultiIndex). + Returns ------- is_contained : cupy array CuPy array of boolean values. + Notes ------- When `level` is None, `values` can only be MultiIndex, or a set/list-like tuples. When `level` is provided, `values` can be Index or MultiIndex, or a set/list-like tuples. + + Examples + -------- + >>> idx = cudf.Index([1,2,3]) + >>> idx + Int64Index([1, 2, 3], dtype='int64') + + Check whether each index value in a list of values. + >>> idx.isin([1, 4]) + array([ True, False, False]) + + >>> midx = cudf.form_pandas(pd.MultiIndex.from_arrays([[1,2,3], + ... ['red', 'blue', 'green']], + ... names=('number', 'color'))) + >>> midx + MultiIndex([(1, 'red'), + (2, 'blue'), + (3, 'green')], + names=['number', 'color']) + + Check whether the strings in the 'color' level of the MultiIndex + are in a list of colors. + + >>> midx.isin(['red', 'orange', 'yellow'], level='color') + array([ True, False, False]) + + To check across the levels of a MultiIndex, pass a list of tuples: + >>> midx.isin([(1, 'red'), (3, 'red')]) + array([ True, False, False]) """ from cudf.utils.dtypes import is_list_like diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index be03fb147ff..abce4f7cfca 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -3775,6 +3775,41 @@ def isin(self, values): ------- TypeError If values is a string + + Examples + -------- + >>> import cudf + >>> s = cudf.Series(['lama', 'cow', 'lama', 'beetle', 'lama', + ... 'hippo'], name='animal') + >>> s.isin(['cow', 'lama']) + 0 True + 1 True + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + + Passing a single string as ``s.isin('lama')`` will raise an error. Use + a list of one element instead: + + >>> s.isin(['lama']) + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + + Strings and integers are distinct and are therefore not comparable: + + >>> cudf.Series([1]).isin(['1']) + 0 False + dtype: bool + >>> cudf.Series([1.1]).isin(['1.1']) + 0 False + dtype: bool """ if is_scalar(values): From e639625924a234f4a3aec58f14d830e96da99d7b Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 1 Mar 2021 14:53:38 -0800 Subject: [PATCH 2/3] update docs --- python/cudf/cudf/core/index.py | 10 ++++++++++ python/cudf/cudf/core/multiindex.py | 11 +++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 88f3f8c4c89..b10ed5aa8cb 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1362,6 +1362,16 @@ def isin(self, values): is_contained : cupy array CuPy array of boolean values. + Examples + -------- + >>> idx = cudf.Index([1,2,3]) + >>> idx + Int64Index([1, 2, 3], dtype='int64') + + Check whether each index value in a list of values. + + >>> idx.isin([1, 4]) + array([ True, False, False]) """ result = self.to_series().isin(values).values diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index d009228ce11..922c45d71dd 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -670,14 +670,8 @@ def isin(self, values, level=None): Examples -------- - >>> idx = cudf.Index([1,2,3]) - >>> idx - Int64Index([1, 2, 3], dtype='int64') - - Check whether each index value in a list of values. - >>> idx.isin([1, 4]) - array([ True, False, False]) - + >>> import cudf + >>> import pandas as pd >>> midx = cudf.form_pandas(pd.MultiIndex.from_arrays([[1,2,3], ... ['red', 'blue', 'green']], ... names=('number', 'color'))) @@ -694,6 +688,7 @@ def isin(self, values, level=None): array([ True, False, False]) To check across the levels of a MultiIndex, pass a list of tuples: + >>> midx.isin([(1, 'red'), (3, 'red')]) array([ True, False, False]) """ From c42750b8af2f390922b2aa0446a4ecef5191f712 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 1 Mar 2021 19:42:17 -0600 Subject: [PATCH 3/3] Update python/cudf/cudf/core/multiindex.py Co-authored-by: Keith Kraus --- python/cudf/cudf/core/multiindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 922c45d71dd..82e89bb00f4 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -672,7 +672,7 @@ def isin(self, values, level=None): -------- >>> import cudf >>> import pandas as pd - >>> midx = cudf.form_pandas(pd.MultiIndex.from_arrays([[1,2,3], + >>> midx = cudf.from_pandas(pd.MultiIndex.from_arrays([[1,2,3], ... ['red', 'blue', 'green']], ... names=('number', 'color'))) >>> midx