Skip to content

Fix reading Series with read_hdf #16610

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 19, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ I/O
^^^

-- Bug in ``pd.read_csv()`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue`16675`)
-- Bug in ``pd.read_hdf()`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)

Plotting
^^^^^^^^
Expand Down
16 changes: 9 additions & 7 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,21 +282,24 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
f(path_or_buf)


def read_hdf(path_or_buf, key=None, **kwargs):
def read_hdf(path_or_buf, key=None, mode='r', **kwargs):
""" read from the store, close it if we opened it

Retrieve pandas object stored in file, optionally based on where
criteria

Parameters
----------
path_or_buf : path (string), buffer, or path object (pathlib.Path or
py._path.local.LocalPath) to read from
path_or_buf : path (string), buffer or path object (pathlib.Path or
py._path.local.LocalPath) designating the file to open, or an
already opened pd.HDFStore object

.. versionadded:: 0.19.0 support for pathlib, py.path.

key : group identifier in the store. Can be omitted if the HDF file
contains a single pandas object.
mode : string, {'r', 'r+', 'a'}, default 'r'. Mode to use when opening
the file. Ignored if path_or_buf is a pd.HDFStore.
where : list of Term (or convertable) objects, optional
start : optional, integer (defaults to None), row number to start
selection
Expand All @@ -313,10 +316,9 @@ def read_hdf(path_or_buf, key=None, **kwargs):

"""

if kwargs.get('mode', 'a') not in ['r', 'r+', 'a']:
if mode not in ['r', 'r+', 'a']:
raise ValueError('mode {0} is not allowed while performing a read. '
'Allowed modes are r, r+ and a.'
.format(kwargs.get('mode')))
'Allowed modes are r, r+ and a.'.format(mode))
# grab the scope
if 'where' in kwargs:
kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1)
Expand All @@ -343,7 +345,7 @@ def read_hdf(path_or_buf, key=None, **kwargs):
raise compat.FileNotFoundError(
'File %s does not exist' % path_or_buf)

store = HDFStore(path_or_buf, **kwargs)
store = HDFStore(path_or_buf, mode=mode, **kwargs)
# can't auto open/close if we are using an iterator
# so delegate to the iterator
auto_close = True
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5247,6 +5247,17 @@ def test_query_compare_column_type(self):
expected = df.loc[[], :]
tm.assert_frame_equal(expected, result)

@pytest.mark.parametrize('format', ['fixed', 'table'])
def test_read_hdf_series_mode_r(self, format):
# GH 16583
# Tests that reading a Series saved to an HDF file
# still works if a mode='r' argument is supplied
series = tm.makeFloatSeries()
with ensure_clean_path(self.path) as path:
series.to_hdf(path, key='data', format=format)
result = pd.read_hdf(path, key='data', mode='r')
tm.assert_series_equal(result, series)

@pytest.mark.skipif(sys.version_info < (3, 6), reason="Need python 3.6")
def test_fspath(self):
with tm.ensure_clean('foo.h5') as path:
Expand Down