From d008275adecfca8838d209df66b2c94991ae7d75 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 17 Aug 2021 20:28:59 -0700 Subject: [PATCH 1/8] Initial --- python/cudf/cudf/core/dataframe.py | 114 --------------- python/cudf/cudf/core/frame.py | 225 +++++++++++++++++++++++++++++ python/cudf/cudf/core/series.py | 194 ------------------------- 3 files changed, 225 insertions(+), 308 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index a78c24c21b9..bd01f52f758 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -35,7 +35,6 @@ from cudf.core.index import BaseIndex, RangeIndex, as_index from cudf.core.indexing import _DataFrameIlocIndexer, _DataFrameLocIndexer from cudf.core.series import Series -from cudf.core.window import Rolling from cudf.utils import applyutils, docutils, ioutils, queryutils, utils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( @@ -1026,68 +1025,6 @@ def assign(self, **kwargs): new[k] = v return new - def head(self, n=5): - """ - Returns the first n rows as a new DataFrame - - Examples - -------- - >>> import cudf - >>> df = cudf.DataFrame() - >>> df['key'] = [0, 1, 2, 3, 4] - >>> df['val'] = [float(i + 10) for i in range(5)] # insert column - >>> df.head(2) - key val - 0 0 10.0 - 1 1 11.0 - """ - return self.iloc[:n] - - def tail(self, n=5): - """ - Returns the last n rows as a new DataFrame - - Examples - -------- - >>> import cudf - >>> df = cudf.DataFrame() - >>> df['key'] = [0, 1, 2, 3, 4] - >>> df['val'] = [float(i + 10) for i in range(5)] # insert column - >>> df.tail(2) - key val - 3 3 13.0 - 4 4 14.0 - """ - if n == 0: - return self.iloc[0:0] - - return self.iloc[-n:] - - def to_string(self): - """ - Convert to string - - cuDF uses Pandas internals for efficient string formatting. - Set formatting options using pandas string formatting options and - cuDF objects will print identically to Pandas objects. - - cuDF supports `null/None` as a value in any column type, which - is transparently supported during this output process. - - Examples - -------- - >>> import cudf - >>> df = cudf.DataFrame() - >>> df['key'] = [0, 1, 2] - >>> df['val'] = [float(i + 10) for i in range(3)] - >>> df.to_string() - ' key val\\n0 0 10.0\\n1 1 11.0\\n2 2 12.0' - """ - return self.__repr__() - - def __str__(self): - return self.to_string() - def astype(self, dtype, copy=False, errors="raise", **kwargs): """ Cast the DataFrame to the given dtype @@ -1641,14 +1578,6 @@ def update( self._mimic_inplace(source_df, inplace=True) - def __invert__(self): - # Defer logic to Series since pandas semantics dictate different - # behaviors for different types that requires too much special casing - # of the standard _unaryop. - return DataFrame( - data={col: ~self[col] for col in self}, index=self.index - ) - def radd(self, other, axis=1, level=None, fill_value=None): """ Get Addition of dataframe and other, element-wise (binary @@ -3502,15 +3431,6 @@ def rename( else: return out.copy(deep=copy) - def nans_to_nulls(self): - """ - Convert nans (if any) to nulls. - """ - df = self.copy() - for col in df.columns: - df[col] = df[col].nans_to_nulls() - return df - def as_gpu_matrix(self, columns=None, order="F"): """Convert to a matrix in device memory. @@ -4503,19 +4423,6 @@ def groupby( sort=sort, ) - @copy_docstring(Rolling) - def rolling( - self, window, min_periods=None, center=False, axis=0, win_type=None - ): - return Rolling( - self, - window, - min_periods=min_periods, - center=center, - axis=axis, - win_type=win_type, - ) - def query(self, expr, local_dict=None): """ Query with a boolean expression using Numba to compile a GPU kernel. @@ -6835,27 +6742,6 @@ def to_feather(self, path, *args, **kwargs): feather.to_feather(self, path, *args, **kwargs) - @ioutils.doc_to_json() - def to_json(self, path_or_buf=None, *args, **kwargs): - """{docstring}""" - from cudf.io import json as json - - return json.to_json(self, path_or_buf=path_or_buf, *args, **kwargs) - - @ioutils.doc_to_hdf() - def to_hdf(self, path_or_buf, key, *args, **kwargs): - """{docstring}""" - from cudf.io import hdf as hdf - - hdf.to_hdf(path_or_buf, key, self, *args, **kwargs) - - @ioutils.doc_to_dlpack() - def to_dlpack(self): - """{docstring}""" - from cudf.io import dlpack as dlpack - - return dlpack.to_dlpack(self) - @ioutils.doc_dataframe_to_csv() def to_csv( self, diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 5f1ac4e0c20..26105c83691 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -27,6 +27,9 @@ ) from cudf.core.column_accessor import ColumnAccessor from cudf.core.join import merge +from cudf.core.window import Rolling +from cudf.utils import ioutils +from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( _is_non_decimal_numeric_dtype, _is_scalar_or_zero_d_array, @@ -4405,6 +4408,216 @@ def cumprod(self, axis=None, skipna=True, *args, **kwargs): "prod", axis=axis, skipna=skipna, cast_to_int=True, *args, **kwargs ) + @ioutils.doc_to_json() + def to_json(self, path_or_buf=None, *args, **kwargs): + """{docstring}""" + + return cudf.io.json.to_json( + self, path_or_buf=path_or_buf, *args, **kwargs + ) + + @ioutils.doc_to_hdf() + def to_hdf(self, path_or_buf, key, *args, **kwargs): + """{docstring}""" + + cudf.io.hdf.to_hdf(path_or_buf, key, self, *args, **kwargs) + + @ioutils.doc_to_dlpack() + def to_dlpack(self): + """{docstring}""" + + return cudf.io.dlpack.to_dlpack(self) + + def to_string(self): + """ + Convert to string + + cuDF uses Pandas internals for efficient string formatting. + Set formatting options using pandas string formatting options and + cuDF objects will print identically to Pandas objects. + + cuDF supports `null/None` as a value in any column type, which + is transparently supported during this output process. + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame() + >>> df['key'] = [0, 1, 2] + >>> df['val'] = [float(i + 10) for i in range(3)] + >>> df.to_string() + ' key val\\n0 0 10.0\\n1 1 11.0\\n2 2 12.0' + """ + return self.__repr__() + + def __str__(self): + return self.to_string() + + def head(self, n=5): + """ + Return the first `n` rows. + This function returns the first `n` rows for the object based + on position. It is useful for quickly testing if your object + has the right type of data in it. + For negative values of `n`, this function returns all rows except + the last `n` rows, equivalent to ``df[:-n]``. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + same type as caller + The first `n` rows of the caller object. + + See Also + -------- + Frame.tail: Returns the last `n` rows. + + Examples + -------- + >>> ser = cudf.Series(['alligator', 'bee', 'falcon', + ... 'lion', 'monkey', 'parrot', 'shark', 'whale', 'zebra']) + >>> ser + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + dtype: object + + Viewing the first 5 lines + + >>> ser.head() + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + dtype: object + + Viewing the first `n` lines (three in this case) + + >>> ser.head(3) + 0 alligator + 1 bee + 2 falcon + dtype: object + + For negative values of `n` + + >>> ser.head(-3) + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + dtype: object + + For Dataframe + + >>> df = cudf.DataFrame() + >>> df['key'] = [0, 1, 2, 3, 4] + >>> df['val'] = [float(i + 10) for i in range(5)] # insert column + >>> df.head(2) + key val + 0 0 10.0 + 1 1 11.0 + """ + return self.iloc[:n] + + def tail(self, n=5): + """ + Returns the last n rows as a new DataFrame or Series + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame() + >>> df['key'] = [0, 1, 2, 3, 4] + >>> df['val'] = [float(i + 10) for i in range(5)] # insert column + >>> df.tail(2) + key val + 3 3 13.0 + 4 4 14.0 + + >>> import cudf + >>> ser = cudf.Series([4, 3, 2, 1, 0]) + >>> ser.tail(2) + 3 1 + 4 0 + """ + if n == 0: + return self.iloc[0:0] + + return self.iloc[-n:] + + @copy_docstring(Rolling) + def rolling( + self, window, min_periods=None, center=False, axis=0, win_type=None + ): + return Rolling( + self, + window, + min_periods=min_periods, + center=center, + axis=axis, + win_type=win_type, + ) + + def nans_to_nulls(self): + """ + Convert nans (if any) to nulls + + Returns + ------- + DataFrame or Series + + Examples + -------- + >>> import cudf + >>> import numpy as np + >>> series = cudf.Series([1, 2, np.nan, None, 10], nan_as_null=False) + >>> series + 0 1.0 + 1 2.0 + 2 NaN + 3 + 4 10.0 + dtype: float64 + >>> series.nans_to_nulls() + 0 1.0 + 1 2.0 + 2 + 3 + 4 10.0 + dtype: float64 + """ + return self._from_data( + { + name: col.copy().nans_to_nulls() + for name, col in self._data.items() + }, + self._index, + ) + + def __invert__(self): + """Bitwise invert (~) for integral dtypes, logical NOT for bools.""" + return self._from_data( + { + name: _apply_inverse_column(col) + for name, col in self._data.items() + }, + self._index, + ) + class SingleColumnFrame(Frame): """A one-dimensional frame. @@ -5133,3 +5346,15 @@ def _drop_rows_by_labels( return res else: return obj.join(key_df, how="leftanti") + + +def _apply_inverse_column(col: ColumnBase) -> ColumnBase: + """Bitwise invert (~) for integral dtypes, logical NOT for bools.""" + if np.issubdtype(col.dtype, np.integer): + return col.unary_operator("invert") + elif np.issubdtype(col.dtype, np.bool_): + return col.unary_operator("not") + else: + raise TypeError( + f"Operation `~` not supported on {col.dtype.type.__name__}" + ) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 1249e126ee9..05d9a958512 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1094,124 +1094,6 @@ def take(self, indices, keep_index=True): {self.name: self._column.take(col_inds, keep_index=False)} ) - def head(self, n=5): - """ - Return the first `n` rows. - This function returns the first `n` rows for the object based - on position. It is useful for quickly testing if your object - has the right type of data in it. - For negative values of `n`, this function returns all rows except - the last `n` rows, equivalent to ``df[:-n]``. - - Parameters - ---------- - n : int, default 5 - Number of rows to select. - - Returns - ------- - same type as caller - The first `n` rows of the caller object. - - See Also - -------- - Series.tail: Returns the last `n` rows. - - Examples - -------- - >>> ser = cudf.Series(['alligator', 'bee', 'falcon', - ... 'lion', 'monkey', 'parrot', 'shark', 'whale', 'zebra']) - >>> ser - 0 alligator - 1 bee - 2 falcon - 3 lion - 4 monkey - 5 parrot - 6 shark - 7 whale - 8 zebra - dtype: object - - Viewing the first 5 lines - - >>> ser.head() - 0 alligator - 1 bee - 2 falcon - 3 lion - 4 monkey - dtype: object - - Viewing the first `n` lines (three in this case) - - >>> ser.head(3) - 0 alligator - 1 bee - 2 falcon - dtype: object - - For negative values of `n` - - >>> ser.head(-3) - 0 alligator - 1 bee - 2 falcon - 3 lion - 4 monkey - 5 parrot - dtype: object - """ - return self.iloc[:n] - - def tail(self, n=5): - """ - Returns the last n rows as a new Series - - Examples - -------- - >>> import cudf - >>> ser = cudf.Series([4, 3, 2, 1, 0]) - >>> ser.tail(2) - 3 1 - 4 0 - """ - if n == 0: - return self.iloc[0:0] - - return self.iloc[-n:] - - def to_string(self): - """Convert to string - - Uses Pandas formatting internals to produce output identical to Pandas. - Use the Pandas formatting settings directly in Pandas to control cuDF - output. - - Returns - ------- - str - String representation of Series - - Examples - -------- - >>> import cudf - >>> series = cudf.Series(['a', None, 'b', 'c', None]) - >>> series - 0 a - 1 - 2 b - 3 c - 4 - dtype: object - >>> series.to_string() - '0 a\\n1 \\n2 b\\n3 c\\n4 \\ndtype: object' - """ # noqa : E501 - return self.__repr__() - - def __str__(self): - return self.to_string() - def __repr__(self): _, height = get_terminal_size() max_rows = ( @@ -2332,17 +2214,6 @@ def ge(self, other, fill_value=None, axis=0): other=other, fn="ge", fill_value=fill_value, can_reindex=True ) - def __invert__(self): - """Bitwise invert (~) for integral dtypes, logical NOT for bools.""" - if np.issubdtype(self.dtype, np.integer): - return self._unaryop("invert") - elif np.issubdtype(self.dtype, np.bool_): - return self._unaryop("not") - else: - raise TypeError( - f"Operation `~` not supported on {self.dtype.type.__name__}" - ) - @copy_docstring(CategoricalAccessor) # type: ignore @property def cat(self): @@ -2693,38 +2564,6 @@ def to_array(self, fillna=None): """ return self._column.to_array(fillna=fillna) - def nans_to_nulls(self): - """ - Convert nans (if any) to nulls - - Returns - ------- - Series - - Examples - -------- - >>> import cudf - >>> import numpy as np - >>> series = cudf.Series([1, 2, np.nan, None, 10], nan_as_null=False) - >>> series - 0 1.0 - 1 2.0 - 2 NaN - 3 - 4 10.0 - dtype: float64 - >>> series.nans_to_nulls() - 0 1.0 - 1 2.0 - 2 - 3 - 4 10.0 - dtype: float64 - """ - return self._from_data( - {self.name: self._column.nans_to_nulls()}, self._index - ) - def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): if bool_only not in (None, True): raise NotImplementedError( @@ -5028,39 +4867,6 @@ def groupby( self, by=by, level=level, dropna=dropna, sort=sort ) - @copy_docstring(Rolling) - def rolling( - self, window, min_periods=None, center=False, axis=0, win_type=None - ): - return Rolling( - self, - window, - min_periods=min_periods, - center=center, - axis=axis, - win_type=win_type, - ) - - @ioutils.doc_to_json() - def to_json(self, path_or_buf=None, *args, **kwargs): - """{docstring}""" - - return cudf.io.json.to_json( - self, path_or_buf=path_or_buf, *args, **kwargs - ) - - @ioutils.doc_to_hdf() - def to_hdf(self, path_or_buf, key, *args, **kwargs): - """{docstring}""" - - cudf.io.hdf.to_hdf(path_or_buf, key, self, *args, **kwargs) - - @ioutils.doc_to_dlpack() - def to_dlpack(self): - """{docstring}""" - - return cudf.io.dlpack.to_dlpack(self) - def rename(self, index=None, copy=True): """ Alter Series name From f8ec05964d715e639d1287bef0ec292a27b9e104 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 17 Aug 2021 21:19:23 -0700 Subject: [PATCH 2/8] make serialization function more alike --- python/cudf/cudf/core/dataframe.py | 8 +++++--- python/cudf/cudf/core/series.py | 9 +++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index bd01f52f758..b085ca238b5 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -525,11 +525,13 @@ def serialize(self): # Use the column directly to avoid duplicating the index # need to pickle column names to handle numpy integer columns - header["column_names"] = pickle.dumps(tuple(self._data.names)) - column_header, column_frames = column.serialize_columns(self._columns) - header["columns"] = column_header + header["columns"], column_frames = column.serialize_columns( + self._columns + ) + header["column_frame_count"] = len(column_frames) frames.extend(column_frames) + header["column_names"] = pickle.dumps(tuple(self._data.names)) return header, frames @classmethod diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 05d9a958512..1097bd51b09 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -329,15 +329,16 @@ def from_pandas(cls, s, nan_as_null=None): def serialize(self): header = {} frames = [] + header["type-serialized"] = pickle.dumps(type(self)) header["index"], index_frames = self._index.serialize() - header["name"] = pickle.dumps(self.name) - frames.extend(index_frames) header["index_frame_count"] = len(index_frames) + frames.extend(index_frames) + header["column"], column_frames = self._column.serialize() - header["type-serialized"] = pickle.dumps(type(self)) - frames.extend(column_frames) header["column_frame_count"] = len(column_frames) + frames.extend(column_frames) + header["name"] = pickle.dumps(self.name) return header, frames @property From 34247d4e95ed863b9baf7db96edb0bd72192246a Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 18 Aug 2021 14:00:44 -0700 Subject: [PATCH 3/8] Remove unused imports --- python/cudf/cudf/core/series.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 1097bd51b09..43b72e51485 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -41,8 +41,7 @@ from cudf.core.groupby.groupby import SeriesGroupBy from cudf.core.index import BaseIndex, Index, RangeIndex, as_index from cudf.core.indexing import _SeriesIlocIndexer, _SeriesLocIndexer -from cudf.core.window import Rolling -from cudf.utils import cudautils, docutils, ioutils +from cudf.utils import cudautils, docutils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( can_convert_to_column, From 66df2a8839992f45786494c58ce6850fb76ab227 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 19 Aug 2021 13:55:34 -0700 Subject: [PATCH 4/8] Use factory method to reconstruct objects in desrialize. --- python/cudf/cudf/core/dataframe.py | 5 ++++- python/cudf/cudf/core/series.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b085ca238b5..dd7cade1786 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -548,7 +548,10 @@ def deserialize(cls, header, frames): column_names = pickle.loads(header["column_names"]) columns = column.deserialize_columns(header["columns"], column_frames) - return cls(dict(zip(column_names, columns)), index=index) + return cls._from_data( + {name: col for name, col in zip(column_names, columns)}, + index=index, + ) @property def dtypes(self): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 43b72e51485..decb6d7ba1a 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -381,7 +381,7 @@ def deserialize(cls, header, frames): col_typ = pickle.loads(header["column"]["type-serialized"]) column = col_typ.deserialize(header["column"], frames[:column_nframes]) - return Series(column, index=index, name=name) + return cls._from_data({name: column}, index=index) def _get_columns_by_label(self, labels, downcast=False): """Return the column specified by `labels` From 3154c6b53660f34fb1a932258f058d1da5c4bc53 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 24 Aug 2021 09:45:47 -0700 Subject: [PATCH 5/8] removing column counts --- python/cudf/cudf/core/dataframe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c97c6848ece..fc225aa42db 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -528,7 +528,6 @@ def serialize(self): header["columns"], column_frames = column.serialize_columns( self._columns ) - header["column_frame_count"] = len(column_frames) frames.extend(column_frames) header["column_names"] = pickle.dumps(tuple(self._data.names)) From 7ac66a008465f23922889e31579c57b258c8b1e8 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 24 Aug 2021 09:46:04 -0700 Subject: [PATCH 6/8] Update python/cudf/cudf/core/dataframe.py Co-authored-by: Vyas Ramasubramani --- python/cudf/cudf/core/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index c97c6848ece..5b354a876ff 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -549,7 +549,7 @@ def deserialize(cls, header, frames): columns = column.deserialize_columns(header["columns"], column_frames) return cls._from_data( - {name: col for name, col in zip(column_names, columns)}, + dict(zip(column_names, columns)), index=index, ) From a713705da76cf84371f64ec984776412b183370e Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 24 Aug 2021 10:23:28 -0700 Subject: [PATCH 7/8] Fix docstrings --- python/cudf/cudf/core/frame.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 3b1ef23022f..8b3677212da 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -4594,7 +4594,7 @@ def head(self, n=5): Returns ------- - same type as caller + DataFrame or Series The first `n` rows of the caller object. See Also @@ -4603,6 +4603,9 @@ def head(self, n=5): Examples -------- + + **Series** + >>> ser = cudf.Series(['alligator', 'bee', 'falcon', ... 'lion', 'monkey', 'parrot', 'shark', 'whale', 'zebra']) >>> ser @@ -4646,7 +4649,7 @@ def head(self, n=5): 5 parrot dtype: object - For Dataframe + **DataFrame** >>> df = cudf.DataFrame() >>> df['key'] = [0, 1, 2, 3, 4] @@ -4664,6 +4667,9 @@ def tail(self, n=5): Examples -------- + + **DataFrame** + >>> import cudf >>> df = cudf.DataFrame() >>> df['key'] = [0, 1, 2, 3, 4] @@ -4673,6 +4679,8 @@ def tail(self, n=5): 3 3 13.0 4 4 14.0 + **Series** + >>> import cudf >>> ser = cudf.Series([4, 3, 2, 1, 0]) >>> ser.tail(2) @@ -4707,8 +4715,10 @@ def nans_to_nulls(self): Examples -------- - >>> import cudf - >>> import numpy as np + + **Series** + + >>> import cudf, numpy as np >>> series = cudf.Series([1, 2, np.nan, None, 10], nan_as_null=False) >>> series 0 1.0 @@ -4724,6 +4734,22 @@ def nans_to_nulls(self): 3 4 10.0 dtype: float64 + + **DataFrame** + + >>> df = cudf.DataFrame() + >>> df['a'] = cudf.Series([1, None, np.nan], nan_as_null=False) + >>> df['b'] = cudf.Series([None, 3.14, np.nan], nan_as_null=False) + >>> df + a b + 0 1.0 + 1 3.14 + 2 NaN NaN + >>> df.nans_to_nulls() + a b + 0 1.0 + 1 3.14 + 2 """ return self._from_data( { From 5d085a3312d069cc91849ef21f9a80ae7345e9c5 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 24 Aug 2021 17:39:40 -0700 Subject: [PATCH 8/8] style --- python/cudf/cudf/core/dataframe.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 39e821791a1..eda9d6c992d 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -547,10 +547,7 @@ def deserialize(cls, header, frames): column_names = pickle.loads(header["column_names"]) columns = column.deserialize_columns(header["columns"], column_frames) - return cls._from_data( - dict(zip(column_names, columns)), - index=index, - ) + return cls._from_data(dict(zip(column_names, columns)), index=index,) @property def dtypes(self):