From bbb279740f6a683b38954d00888a9a755983135d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 20 Sep 2021 13:32:55 -0700 Subject: [PATCH 1/2] fix duplicate names issues in desearalization --- python/cudf/cudf/core/multiindex.py | 6 ++--- python/cudf/cudf/tests/test_multiindex.py | 31 +++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 84566b4627c..fba857694e8 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -977,10 +977,10 @@ def deserialize(cls, header, frames): ) df = cudf.DataFrame.deserialize(header["source_data"], frames) obj = cls.from_frame(df) - obj._set_names(names) - return obj + return obj._set_names(names) columns = column.deserialize_columns(header["columns"], frames) - return cls._from_data(dict(zip(names, columns))) + obj = cls._from_data(dict(zip(range(0, len(names)), columns))) + return obj._set_names(names) def __getitem__(self, index): match = self.take(index) diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index 465cf36e1f3..f5fc0543363 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -5,7 +5,9 @@ """ import itertools import operator +import pickle import re +from io import BytesIO import cupy as cp import numpy as np @@ -1553,3 +1555,32 @@ def test_multiIndex_duplicate_names(): ) assert_eq(gi, pi) + + +@pytest.mark.parametrize( + "names", + [ + ["a", "b", "c"], + [None, None, None], + ["aa", "aa", "aa"], + ["bb", "aa", "aa"], + None, + ], +) +def test_pickle_rountrip_multiIndex(names): + df = cudf.DataFrame( + { + "one": [1, 2, 3], + "two": [True, False, True], + "three": ["ab", "cd", "ef"], + "four": [0.2, 0.1, -10.2], + } + ) + actual_df = df.set_index(["one", "two", "three"]) + actual_df.index.names = names + local_file = BytesIO() + + pickle.dump(actual_df, local_file) + local_file.seek(0) + expected_df = pickle.load(local_file) + assert_eq(expected_df, actual_df) From ce5f6a7ca75a65bcbe82725fbbc696dcb191b20a Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 20 Sep 2021 17:08:00 -0500 Subject: [PATCH 2/2] Update test_multiindex.py --- python/cudf/cudf/tests/test_multiindex.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index f5fc0543363..981ab8b63b9 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -1576,11 +1576,11 @@ def test_pickle_rountrip_multiIndex(names): "four": [0.2, 0.1, -10.2], } ) - actual_df = df.set_index(["one", "two", "three"]) - actual_df.index.names = names + expected_df = df.set_index(["one", "two", "three"]) + expected_df.index.names = names local_file = BytesIO() - pickle.dump(actual_df, local_file) + pickle.dump(expected_df, local_file) local_file.seek(0) - expected_df = pickle.load(local_file) + actual_df = pickle.load(local_file) assert_eq(expected_df, actual_df)