DEPR: Deprecate Series/Dataframe.to_dense/to_sparse (#26684)

VikramjeetD · jreback · commit 376a05e4d5b1 · 2019-06-18T20:41:02.000-04:00
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -503,6 +503,7 @@ Other Deprecations
 - The :meth:`Series.ftype`, :meth:`Series.ftypes` and :meth:`DataFrame.ftypes` methods are deprecated and will be removed in a future version.
   Instead, use :meth:`Series.dtype` and :meth:`DataFrame.dtypes` (:issue:`26705`).
 - :meth:`Timedelta.resolution` is deprecated and replaced with :meth:`Timedelta.resolution_string`.  In a future version, :meth:`Timedelta.resolution` will be changed to behave like the standard library :attr:`timedelta.resolution` (:issue:`21344`)
+- :meth:`Series.to_sparse`, :meth:`DataFrame.to_sparse`, :meth:`Series.to_dense` and :meth:`DataFrame.to_dense` are deprecated and will be removed in a future version. (:issue:`26557`).
 
 .. _whatsnew_0250.prior_deprecations:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1889,6 +1889,8 @@ def to_sparse(self, fill_value=None, kind='block'):
         """
         Convert to SparseDataFrame.
 
+        .. deprecated:: 0.25.0
+
         Implement the sparse version of the DataFrame meaning that any data
         matching a specific value it's omitted in the representation.
         The sparse DataFrame allows for a more efficient storage.
@@ -1939,10 +1941,15 @@ def to_sparse(self, fill_value=None, kind='block'):
         >>> type(sdf)  # doctest: +SKIP
         <class 'pandas.core.sparse.frame.SparseDataFrame'>
         """
+        warnings.warn("DataFrame.to_sparse is deprecated and will be removed "
+                      "in a future version", FutureWarning, stacklevel=2)
+
         from pandas.core.sparse.api import SparseDataFrame
-        return SparseDataFrame(self._series, index=self.index,
-                               columns=self.columns, default_kind=kind,
-                               default_fill_value=fill_value)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", message="SparseDataFrame")
+            return SparseDataFrame(self._series, index=self.index,
+                                   columns=self.columns, default_kind=kind,
+                                   default_fill_value=fill_value)
 
     @deprecate_kwarg(old_arg_name='encoding', new_arg_name=None)
     def to_stata(self, fname, convert_dates=None, write_index=True,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1940,11 +1940,16 @@ def to_dense(self):
         """
         Return dense representation of NDFrame (as opposed to sparse).
 
+        .. deprecated:: 0.25.0
+
         Returns
         -------
         %(klass)s
             Dense %(klass)s.
         """
+        warnings.warn("DataFrame/Series.to_dense is deprecated "
+                      "and will be removed in a future version",
+                      FutureWarning, stacklevel=2)
         # compat
         return self
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -630,9 +630,9 @@ def _aggregate_series_fast(self, obj, func):
         group_index, _, ngroups = self.group_info
 
         # avoids object / Series creation overhead
-        dummy = obj._get_values(slice(None, 0)).to_dense()
+        dummy = obj._get_values(slice(None, 0))
         indexer = get_group_index_sorter(group_index, ngroups)
-        obj = obj._take(indexer).to_dense()
+        obj = obj._take(indexer)
         group_index = algorithms.take_nd(
             group_index, indexer, allow_fill=False)
         grouper = reduction.SeriesGrouper(obj, func, group_index, ngroups,
@@ -879,7 +879,7 @@ def apply(self, f):
 class SeriesSplitter(DataSplitter):
 
     def _chop(self, sdata, slice_obj):
-        return sdata._get_values(slice_obj).to_dense()
+        return sdata._get_values(slice_obj)
 
 
 class FrameSplitter(DataSplitter):
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1592,6 +1592,8 @@ def to_sparse(self, kind='block', fill_value=None):
         """
         Convert Series to SparseSeries.
 
+        .. deprecated:: 0.25.0
+
         Parameters
         ----------
         kind : {'block', 'integer'}, default 'block'
@@ -1603,12 +1605,17 @@ def to_sparse(self, kind='block', fill_value=None):
         SparseSeries
             Sparse representation of the Series.
         """
+
+        warnings.warn("Series.to_sparse is deprecated and will be removed "
+                      "in a future version", FutureWarning, stacklevel=2)
         from pandas.core.sparse.series import SparseSeries
 
         values = SparseArray(self, kind=kind, fill_value=fill_value)
-        return SparseSeries(
-            values, index=self.index, name=self.name
-        ).__finalize__(self)
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", message="SparseSeries")
+            return SparseSeries(
+                values, index=self.index, name=self.name
+            ).__finalize__(self)
 
     def _set_name(self, name, inplace=False):
         """
diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py
@@ -9,6 +9,7 @@
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
 class TestSparseArrayArithmetics:
 
     _base = np.array
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
@@ -918,3 +918,17 @@ def test_axis_classmethods(self, box):
             assert obj._get_axis_name(v) == box._get_axis_name(v)
             assert obj._get_block_manager_axis(v) == \
                 box._get_block_manager_axis(v)
+
+    def test_deprecated_to_dense(self):
+        # GH 26557: DEPR
+        # Deprecated 0.25.0
+
+        df = pd.DataFrame({"A": [1, 2, 3]})
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.to_dense()
+        tm.assert_frame_equal(result, df)
+
+        ser = pd.Series([1, 2, 3])
+        with tm.assert_produces_warning(FutureWarning):
+            result = ser.to_dense()
+        tm.assert_series_equal(result, ser)
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1013,6 +1013,8 @@ def test_datetime_tz(self):
         assert stz.to_json() == s_naive.to_json()
 
     @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
     def test_sparse(self):
         # GH4377 df.to_json segfaults with non-ndarray blocks
         df = pd.DataFrame(np.random.randn(10, 4))
diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
@@ -551,6 +551,8 @@ def test_dataframe_duplicate_column_names(self):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
 class TestSparse(TestPackers):
 
     def _check_roundtrip(self, obj, comparator, **kwargs):
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
@@ -51,6 +51,12 @@
     "ignore:object name:tables.exceptions.NaturalNameWarning"
 )
 ignore_sparse = pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+ignore_dataframe_tosparse = pytest.mark.filterwarnings(
+    "ignore:DataFrame.to_sparse:FutureWarning"
+)
+ignore_series_tosparse = pytest.mark.filterwarnings(
+    "ignore:Series.to_sparse:FutureWarning"
+)
 
 # contextmanager to ensure the file cleanup
 
@@ -2245,6 +2251,7 @@ def test_series(self):
                               check_index_type=False)
 
     @ignore_sparse
+    @ignore_series_tosparse
     def test_sparse_series(self):
 
         s = tm.makeStringSeries()
@@ -2262,6 +2269,7 @@ def test_sparse_series(self):
                               check_series_type=True)
 
     @ignore_sparse
+    @ignore_dataframe_tosparse
     def test_sparse_frame(self):
 
         s = tm.makeDataFrame()
@@ -2601,6 +2609,7 @@ def test_overwrite_node(self):
             tm.assert_series_equal(store['a'], ts)
 
     @ignore_sparse
+    @ignore_dataframe_tosparse
     def test_sparse_with_compression(self):
 
         # GH 2931
@@ -3746,6 +3755,7 @@ def test_start_stop_multiple(self):
             tm.assert_frame_equal(result, expected)
 
     @ignore_sparse
+    @ignore_dataframe_tosparse
     def test_start_stop_fixed(self):
 
         with ensure_clean_store(self.path) as store:
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -123,6 +123,7 @@ def test_sort_index_name(self):
         assert result.name == self.ts.name
 
     @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
     def test_to_sparse_pass_name(self):
         result = self.ts.to_sparse()
         assert result.name == self.ts.name
diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py
@@ -212,6 +212,7 @@ def test_combine_first_dt_tz_values(self, tz_naive_fixture):
         assert_series_equal(exp, result)
 
     @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
     def test_concat_empty_series_dtypes(self):
 
         # booleans
@@ -244,16 +245,16 @@ def test_concat_empty_series_dtypes(self):
 
         # sparse
         # TODO: move?
-        result = pd.concat([Series(dtype='float64').to_sparse(), Series(
-            dtype='float64').to_sparse()])
+        result = pd.concat([Series(dtype='float64').to_sparse(),
+                            Series(dtype='float64').to_sparse()])
         assert result.dtype == 'Sparse[float64]'
 
         # GH 26705 - Assert .ftype is deprecated
         with tm.assert_produces_warning(FutureWarning):
             assert result.ftype == 'float64:sparse'
 
-        result = pd.concat([Series(dtype='float64').to_sparse(), Series(
-            dtype='float64')])
+        result = pd.concat([Series(dtype='float64').to_sparse(),
+                            Series(dtype='float64')])
         # TODO: release-note: concat sparse dtype
         expected = pd.core.sparse.api.SparseDtype(np.float64)
         assert result.dtype == expected
@@ -262,8 +263,8 @@ def test_concat_empty_series_dtypes(self):
         with tm.assert_produces_warning(FutureWarning):
             assert result.ftype == 'float64:sparse'
 
-        result = pd.concat([Series(dtype='float64').to_sparse(), Series(
-            dtype='object')])
+        result = pd.concat([Series(dtype='float64').to_sparse(),
+                            Series(dtype='object')])
         # TODO: release-note: concat sparse dtype
         expected = pd.core.sparse.api.SparseDtype('object')
         assert result.dtype == expected
diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py
@@ -781,6 +781,7 @@ def test_series_fillna_limit(self):
         assert_series_equal(result, expected)
 
     @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
     def test_sparse_series_fillna_limit(self):
         index = np.arange(10)
         s = Series(np.random.randn(10), index=index)
@@ -809,6 +810,7 @@ def test_sparse_series_fillna_limit(self):
         assert_series_equal(result, expected)
 
     @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
     def test_sparse_series_pad_backfill_limit(self):
         index = np.arange(10)
         s = Series(np.random.randn(10), index=index)
diff --git a/pandas/tests/sparse/frame/test_apply.py b/pandas/tests/sparse/frame/test_apply.py
@@ -38,6 +38,7 @@ def fill_frame(frame):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
 def test_apply(frame):
     applied = frame.apply(np.sqrt)
     assert isinstance(applied, SparseDataFrame)
@@ -72,6 +73,7 @@ def test_apply_empty(empty):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
 def test_apply_nonuq():
     orig = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                      index=['a', 'a', 'c'])
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
@@ -25,6 +25,8 @@ def test_deprecated():
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
 class TestSparseDataFrame(SharedWithSparse):
     klass = SparseDataFrame
 
@@ -348,6 +350,18 @@ def test_dense_to_sparse(self):
         assert sdf.default_fill_value == 0
         tm.assert_frame_equal(sdf.to_dense(), df)
 
+    def test_deprecated_dense_to_sparse(self):
+        # GH 26557
+        # Deprecated 0.25.0
+
+        df = pd.DataFrame({"A": [1, np.nan, 3]})
+        sparse_df = pd.SparseDataFrame({"A": [1, np.nan, 3]})
+
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            result = df.to_sparse()
+        tm.assert_frame_equal(result, sparse_df)
+
     def test_density(self):
         df = SparseSeries([nan, nan, nan, 0, 1, 2, 3, 4, 5, 6])
         assert df.density == 0.7
@@ -1294,6 +1308,7 @@ def test_default_fill_value_with_no_data(self):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
 class TestSparseDataFrameArithmetic:
 
     def test_numeric_op_scalar(self):
@@ -1324,6 +1339,7 @@ def test_comparison_op_scalar(self):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
 class TestSparseDataFrameAnalytics:
 
     def test_cumsum(self, float_frame):
diff --git a/pandas/tests/sparse/frame/test_to_csv.py b/pandas/tests/sparse/frame/test_to_csv.py
@@ -6,6 +6,7 @@
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
 class TestSparseDataFrameToCsv:
     fill_values = [np.nan, 0, None, 1]
 
diff --git a/pandas/tests/sparse/frame/test_to_from_scipy.py b/pandas/tests/sparse/frame/test_to_from_scipy.py
@@ -174,6 +174,7 @@ def test_from_scipy_fillna(spmatrix):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
 def test_index_names_multiple_nones():
     # https://github.com./pandas-dev/pandas/pull/24092
     sparse = pytest.importorskip("scipy.sparse")
diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
@@ -61,6 +61,7 @@ def _test_data2_zero():
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
 class TestSparseSeries(SharedWithSparse):
 
     series_klass = SparseSeries
@@ -1045,6 +1046,7 @@ def test_memory_usage_deep(self, deep, fill_value):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
 class TestSparseHandlingMultiIndexes:
 
     def setup_method(self, method):
@@ -1076,6 +1078,7 @@ def test_round_trip_preserve_multiindex_names(self):
     "ignore:the matrix subclass:PendingDeprecationWarning"
 )
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
 class TestSparseSeriesScipyInteraction:
     # Issue 8048: add SparseSeries coo methods
 
@@ -1444,6 +1447,7 @@ def _dense_series_compare(s, f):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
 class TestSparseSeriesAnalytics:
 
     def setup_method(self, method):
@@ -1538,6 +1542,7 @@ def test_constructor_dict_datetime64_index(datetime_type):
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Series.to_sparse:FutureWarning")
 def test_to_sparse():
     # https://github.com./pandas-dev/pandas/issues/22389
     arr = pd.SparseArray([1, 2, None, 3])
@@ -1546,6 +1551,20 @@ def test_to_sparse():
     tm.assert_sp_array_equal(result.values, arr, check_kind=False)
 
 
+@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+def test_deprecated_to_sparse():
+    # GH 26557
+    # Deprecated 0.25.0
+
+    ser = Series([1, np.nan, 3])
+    sparse_ser = pd.SparseSeries([1, np.nan, 3])
+
+    with tm.assert_produces_warning(FutureWarning,
+                                    check_stacklevel=False):
+        result = ser.to_sparse()
+    tm.assert_series_equal(result, sparse_ser)
+
+
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 def test_constructor_mismatched_raises():
     msg = "Length of passed values is 2, index implies 3"
diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py
diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/sparse/test_format.py
diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py
diff --git a/pandas/tests/sparse/test_indexing.py b/pandas/tests/sparse/test_indexing.py
diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py