From 569bc7a35c06ef8eb9a121ca983ba8951399a124 Mon Sep 17 00:00:00 2001 From: Matt Kirk Date: Mon, 12 Feb 2018 19:06:11 +0700 Subject: [PATCH] BUG: assign doesnt cast SparseDataFrame to DataFrame (#19178) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/sparse/array.py | 9 +++++---- pandas/tests/sparse/frame/test_frame.py | 11 +++++++++++ pandas/tests/sparse/test_array.py | 15 +++++++++++++++ 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index acab9d0bbebf8..72f63a4da0f4d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -822,6 +822,7 @@ Sparse - Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) - Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) - Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) +- Bug in constructing a ``SparseArray``: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 65aefd9fb8c0a..3cbae717d0e07 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -26,7 +26,8 @@ is_scalar, is_dtype_equal) from pandas.core.dtypes.cast import ( maybe_convert_platform, maybe_promote, - astype_nansafe, find_common_type) + astype_nansafe, find_common_type, infer_dtype_from_scalar, + construct_1d_arraylike_from_scalar) from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype import pandas._libs.sparse as splib @@ -162,9 +163,9 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer', data = np.nan if not is_scalar(data): raise Exception("must only pass scalars with an index ") - values = np.empty(len(index), dtype='float64') - values.fill(data) - data = values + dtype = infer_dtype_from_scalar(data)[0] + data = construct_1d_arraylike_from_scalar( + data, len(index), dtype) if isinstance(data, ABCSparseSeries): data = data.values diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 29fad3c8eefaf..0e8b2161cafc4 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -1257,3 +1257,14 @@ def test_quantile_multi(self): tm.assert_frame_equal(result, dense_expected) tm.assert_sp_frame_equal(result, sparse_expected) + + def test_assign_with_sparse_frame(self): + # GH 19163 + df = pd.DataFrame({"a": [1, 2, 3]}) + res = df.to_sparse(fill_value=False).assign(newcol=False) + exp = df.assign(newcol=False).to_sparse(fill_value=False) + + tm.assert_sp_frame_equal(res, exp) + + for column in res.columns: + assert type(res[column]) is SparseSeries diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py index 8de93ff320961..6c0c83cf65ff7 100644 --- a/pandas/tests/sparse/test_array.py +++ b/pandas/tests/sparse/test_array.py @@ -113,6 +113,21 @@ def test_constructor_spindex_dtype(self): assert arr.dtype == np.int64 assert arr.fill_value == 0 + @pytest.mark.parametrize('scalar,dtype', [ + (False, bool), + (0.0, 'float64'), + (1, 'int64'), + ('z', 'object')]) + def test_scalar_with_index_infer_dtype(self, scalar, dtype): + # GH 19163 + arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar) + exp = SparseArray([scalar, scalar, scalar], fill_value=scalar) + + tm.assert_sp_array_equal(arr, exp) + + assert arr.dtype == dtype + assert exp.dtype == dtype + def test_sparseseries_roundtrip(self): # GH 13999 for kind in ['integer', 'block']: