pandas-dev · goldenbull · Oct 8, 2016 · Dec 28, 2016 · Dec 29, 2016 · Dec 29, 2016
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -2926,6 +2926,45 @@ any pickled pandas object (or any other pickled object) from file:
 
     These methods were previously ``pd.save`` and ``pd.load``, prior to 0.12.0, and are now deprecated.
 
+.. _io.pickle.compression:
+
+Read/Write compressed pickle files
+''''''''''''''
+
+.. versionadded:: 0.20.0
+
+:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read
+and write compressed pickle files. Compression types of ``gzip``, ``bz2``, ``xz`` supports
+both read and write. ``zip`` file supports read only and must contain only one data file
+to be read in.
+Compression type can be an explicitely parameter or be inferred from the file extension.
+If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or
+``'.xz'``, respectively.
+
+.. ipython:: python
+
+   df = pd.DataFrame({
+       'A': np.random.randn(1000),
+       'B': np.random.randn(1000),
+       'C': np.random.randn(1000)})
+   df.to_pickle("data.pkl.compress", compression="gzip")  # explicit compression type
+   df.to_pickle("data.pkl.xz", compression="infer")  # infer compression type from extension
+   df.to_pickle("data.pkl.gz")  # default, using "infer"
+   df["A"].to_pickle("s1.pkl.bz2")
+
+   df = pd.read_pickle("data.pkl.compress", compression="gzip")
+   df = pd.read_pickle("data.pkl.xz", compression="infer")
+   df = pd.read_pickle("data.pkl.gz")
+   s = pd.read_pickle("s1.pkl.bz2")
+
+.. ipython:: python
+   :suppress:
+   import os
+   os.remove("data.pkl.compress")
+   os.remove("data.pkl.xz")
+   os.remove("data.pkl.gz")
+   os.remove("s1.pkl.bz2")
+
 .. _io.msgpack:
 
 msgpack

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -97,6 +97,40 @@ support for bz2 compression in the python 2 c-engine improved (:issue:`14874`).
    df = pd.read_table(url, compression='bz2')  # explicitly specify compression
    df.head(2)
 
+.. _whatsnew_0200.enhancements.pickle_compression:
+
+Pickle file I/O now supports compression
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle`
+can now read from and write to compressed pickle files. Compression methods
+can be an explicit parameter or be inferred from the file extension.
+See :ref:`Read/Write compressed pickle files <io.pickle.compression>`
+
+.. ipython:: python
+
+   df = pd.DataFrame({
+       'A': np.random.randn(1000),
+       'B': np.random.randn(1000),
+       'C': np.random.randn(1000)})
+   df.to_pickle("data.pkl.compress", compression="gzip")  # explicit compression type
+   df.to_pickle("data.pkl.xz", compression="infer")  # infer compression type from extension
+   df.to_pickle("data.pkl.gz")  # default, using "infer"
+   df["A"].to_pickle("s1.pkl.bz2")
+
+   df = pd.read_pickle("data.pkl.compress", compression="gzip")
+   df = pd.read_pickle("data.pkl.xz", compression="infer")
+   df = pd.read_pickle("data.pkl.gz")
+   s = pd.read_pickle("s1.pkl.bz2")
+
+.. ipython:: python
+   :suppress:
+   import os
+   os.remove("data.pkl.compress")
+   os.remove("data.pkl.xz")
+   os.remove("data.pkl.gz")
+   os.remove("s1.pkl.bz2")
+
 .. _whatsnew_0200.enhancements.uint64_support:
 
 UInt64 Support Improved

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1278,17 +1278,21 @@ def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail',
                    if_exists=if_exists, index=index, index_label=index_label,
                    chunksize=chunksize, dtype=dtype)
 
-    def to_pickle(self, path):
+    def to_pickle(self, path, compression='infer'):
         """
         Pickle (serialize) object to input file path.
 
         Parameters
         ----------
         path : string
             File path
+        compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer'
+            a string representing the compression to use in the output file
+
+            .. versionadded:: 0.20.0
         """
         from pandas.io.pickle import to_pickle
-        return to_pickle(self, path)
+        return to_pickle(self, path, compression=compression)
 
     def to_clipboard(self, excel=None, sep=None, **kwargs):
         """

diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -305,7 +305,7 @@ def _infer_compression(filepath_or_buffer, compression):
 
 
 def _get_handle(path_or_buf, mode, encoding=None, compression=None,
-                memory_map=False):
+                memory_map=False, is_text=True):
     """
     Get file handle for given path/buffer and mode.
 
@@ -320,7 +320,9 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
         Supported compression protocols are gzip, bz2, zip, and xz
     memory_map : boolean, default False
         See parsers._parser_params for more information.
-
+    is_text : boolean, default True
+        whether file/buffer is in text format (csv, json, etc.), or in binary
+        mode (pickle, etc.)
     Returns
     -------
     f : file-like
@@ -394,13 +396,17 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
         elif encoding:
             # Python 3 and encoding
             f = open(path_or_buf, mode, encoding=encoding)
-        else:
+        elif is_text:
             # Python 3 and no explicit encoding
             f = open(path_or_buf, mode, errors='replace')
+        else:
+            # Python 3 and binary mode
+            f = open(path_or_buf, mode)
         handles.append(f)
 
     # in Python 3, convert BytesIO or fileobjects passed with an encoding
-    if compat.PY3 and (compression or isinstance(f, need_text_wrapping)):
+    if compat.PY3 and is_text and\
+            (compression or isinstance(f, need_text_wrapping)):
         from io import TextIOWrapper
         f = TextIOWrapper(f, encoding=encoding)
         handles.append(f)

diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
@@ -4,9 +4,10 @@
 from numpy.lib.format import read_array, write_array
 from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3
 from pandas.types.common import is_datetime64_dtype, _NS_DTYPE
+from pandas.io.common import _get_handle, _infer_compression
 
 
-def to_pickle(obj, path):
+def to_pickle(obj, path, compression='infer'):
     """
     Pickle (serialize) object to input file path
 
@@ -15,12 +16,23 @@ def to_pickle(obj, path):
     obj : any object
     path : string
         File path
+    compression : {'infer', 'gzip', 'bz2', 'xz', None}, default 'infer'
+        a string representing the compression to use in the output file
+
+        .. versionadded:: 0.20.0
     """
-    with open(path, 'wb') as f:
+    inferred_compression = _infer_compression(path, compression)
+    f, fh = _get_handle(path, 'wb',
+                        compression=inferred_compression,
+                        is_text=False)
+    try:
         pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
+    finally:
+        for _f in fh:
+            _f.close()
 
 
-def read_pickle(path):
+def read_pickle(path, compression='infer'):
     """
     Load pickled pandas object (or any other pickled object) from the specified
     file path
@@ -32,12 +44,32 @@ def read_pickle(path):
     ----------
     path : string
         File path
+    compression : {'infer', 'gzip', 'bz2', 'xz', 'zip', None}, default 'infer'
+        For on-the-fly decompression of on-disk data. If 'infer', then use
+        gzip, bz2, xz or zip if path is a string ending in '.gz', '.bz2', 'xz',
+        or 'zip' respectively, and no decompression otherwise.
+        Set to None for no decompression.
+
+        .. versionadded:: 0.20.0
 
     Returns
     -------
     unpickled : type of object stored in file
     """
 
+    inferred_compression = _infer_compression(path, compression)
+
+    def read_wrapper(func):
+        # wrapper file handle open/close operation
+        f, fh = _get_handle(path, 'rb',
+                            compression=inferred_compression,
+                            is_text=False)
+        try:
+            return func(f)
+        finally:
+            for _f in fh:
+                _f.close()
+
     def try_read(path, encoding=None):
         # try with cPickle
         # try with current pickle, if we have a Type Error then
@@ -48,26 +80,24 @@ def try_read(path, encoding=None):
         # cpickle
         # GH 6899
         try:
-            with open(path, 'rb') as fh:
-                return pkl.load(fh)
+            return read_wrapper(lambda f: pkl.load(f))
         except Exception:
             # reg/patched pickle
             try:
-                with open(path, 'rb') as fh:
-                    return pc.load(fh, encoding=encoding, compat=False)
-
+                return read_wrapper(
+                    lambda f: pc.load(f, encoding=encoding, compat=False))
             # compat pickle
             except:
-                with open(path, 'rb') as fh:
-                    return pc.load(fh, encoding=encoding, compat=True)
-
+                return read_wrapper(
+                    lambda f: pc.load(f, encoding=encoding, compat=True))
     try:
         return try_read(path)
     except:
         if PY3:
             return try_read(path, encoding='latin1')
         raise
 
+
 # compat with sparse pickle / unpickle