From 96508ca1fe4c2eb45f873c4d44c942f290e21e30 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 13 Mar 2019 08:23:21 -0500
Subject: [PATCH] API: concat on sparse values

API breaking change to `concat(List[DataFrame[Sparse]])` to
return a DataFrame with sparse values, rather than a SparseDataFrame.

Doing an outright break, rather than deprecation, because I have a
followup PR deprecating SparseDataFrame. We return this internally in
a few places (e.g. get_dummies on all-sparse data).

Closes https://github.com/pandas-dev/pandas/issues/25702
---
 doc/source/whatsnew/v0.25.0.rst      | 36 ++++++++++++++++++++++++++++
 pandas/core/dtypes/concat.py         |  3 +--
 pandas/core/groupby/generic.py       |  8 +++++++
 pandas/tests/reshape/test_reshape.py | 10 ++++++++
 4 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 1840c47b4054f..db0a4d00f95d9 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -64,6 +64,42 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`)
     df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
     df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
 
+Concatenating Sparse Values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When passed DataFrames whose values are sparse, :func:`concat` will now return a
+Series or DataFrame with sparse values, rather than a ``SparseDataFrame`` (:issue:`25702`).
+
+.. ipython:: python
+
+   df = pd.DataFrame({"A": pd.SparseArray([0, 1])})
+
+*Previous Behavior:*
+
+.. code-block:: ipython
+
+   In [2]: type(pd.concat([df, df]))
+   pandas.core.sparse.frame.SparseDataFrame
+
+*New Behavior:*
+
+.. ipython:: python
+
+   type(pd.concat([df, df]))
+
+
+This now matches the existing behavior of :class:`concat` on ``Series`` with sparse values.
+:func:`concat` will continue to return a ``SparseDataFrame`` when all the values
+are instances of ``SparseDataFrame``.
+
+This change also affects routines using :func:`concat` internally, like :func:`get_dummies`,
+which now returns a :class:`DataFrame` in all cases (previously a ``SparseDataFrame`` was
+returned if all the columns were dummy encoded, and a :class:`DataFrame` otherwise).
+
+Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
+cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
+
+
 .. _whatsnew_0250.api_breaking.deps:
 
 Increased minimum versions for dependencies
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 10e903acbe538..7e765a38cedcd 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -89,8 +89,7 @@ def _get_frame_result_type(result, objs):
     """
 
     if (result.blocks and (
-            all(is_sparse(b) for b in result.blocks) or
-            all(isinstance(obj, ABCSparseDataFrame) for obj in objs))):
+            any(isinstance(obj, ABCSparseDataFrame) for obj in objs))):
         from pandas.core.sparse.api import SparseDataFrame
         return SparseDataFrame
     else:
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index c5f9e52e07ecf..e665b4b646ed3 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -40,6 +40,7 @@
 import pandas.core.indexes.base as ibase
 from pandas.core.internals import BlockManager, make_block
 from pandas.core.series import Series
+from pandas.core.sparse.frame import SparseDataFrame
 
 from pandas.plotting._core import boxplot_frame_groupby
 
@@ -198,9 +199,16 @@ def aggregate(self, arg, *args, **kwargs):
                     assert not args and not kwargs
                     result = self._aggregate_multiple_funcs(
                         [arg], _level=_level, _axis=self.axis)
+
                     result.columns = Index(
                         result.columns.levels[0],
                         name=self._selected_obj.columns.name)
+
+                    if isinstance(self.obj, SparseDataFrame):
+                        # Backwards compat for groupby.agg() with sparse
+                        # values. concat no longer converts DataFrame[Sparse]
+                        # to SparseDataFrame, so we do it here.
+                        result = SparseDataFrame(result._data)
                 except Exception:
                     result = self._aggregate_generic(arg, *args, **kwargs)
 
diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py
index a5b6cffd1d86c..9d75f5f4f2ca4 100644
--- a/pandas/tests/reshape/test_reshape.py
+++ b/pandas/tests/reshape/test_reshape.py
@@ -577,6 +577,16 @@ def test_get_dummies_duplicate_columns(self, df):
 
         tm.assert_frame_equal(result, expected)
 
+    def test_get_dummies_all_sparse(self):
+        df = pd.DataFrame({"A": [1, 2]})
+        result = pd.get_dummies(df, columns=['A'], sparse=True)
+        dtype = SparseDtype('uint8', 0)
+        expected = pd.DataFrame({
+            'A_1': SparseArray([1, 0], dtype=dtype),
+            'A_2': SparseArray([0, 1], dtype=dtype),
+        })
+        tm.assert_frame_equal(result, expected)
+
 
 class TestCategoricalReshape(object):