Skip to content

Commit aabcf90

Browse files
committed
COMPAT: avoid calling getsizeof() on PyPy
1 parent 3e9e947 commit aabcf90

File tree

9 files changed

+77
-33
lines changed

9 files changed

+77
-33
lines changed

Diff for: doc/source/whatsnew/v0.21.0.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -307,8 +307,10 @@ Bug Fixes
307307
Conversion
308308
^^^^^^^^^^
309309

310-
- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`)
310+
- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`)
311311
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
312+
- Fix ``memory_usage`` to support PyPy. Objects on PyPy do not have a
313+
fixed size, so an approximation is used instead (:issue: `17228`)
312314

313315

314316
Indexing

Diff for: pandas/compat/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from distutils.version import LooseVersion
3232
from itertools import product
3333
import sys
34+
import platform
3435
import types
3536
from unicodedata import east_asian_width
3637
import struct
@@ -41,6 +42,7 @@
4142
PY3 = (sys.version_info[0] >= 3)
4243
PY35 = (sys.version_info >= (3, 5))
4344
PY36 = (sys.version_info >= (3, 6))
45+
PYPY = (platform.python_implementation() == 'PyPy')
4446

4547
try:
4648
import __builtin__ as builtins

Diff for: pandas/core/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import pandas.core.nanops as nanops
1616
import pandas._libs.lib as lib
1717
from pandas.compat.numpy import function as nv
18+
from pandas.compat import PYPY
1819
from pandas.util._decorators import (Appender, cache_readonly,
1920
deprecate_kwarg, Substitution)
2021
from pandas.core.common import AbstractMethodError
@@ -1061,7 +1062,7 @@ def memory_usage(self, deep=False):
10611062
Notes
10621063
-----
10631064
Memory usage does not include memory consumed by elements that
1064-
are not components of the array if deep=False
1065+
are not components of the array if deep=False or if used on PyPy
10651066
10661067
See Also
10671068
--------
@@ -1071,9 +1072,8 @@ def memory_usage(self, deep=False):
10711072
return self.values.memory_usage(deep=deep)
10721073

10731074
v = self.values.nbytes
1074-
if deep and is_object_dtype(self):
1075+
if deep and is_object_dtype(self) and not PYPY:
10751076
v += lib.memory_usage_of_objects(self.values)
1076-
10771077
return v
10781078

10791079
def factorize(self, sort=False, na_sentinel=-1):

Diff for: pandas/core/indexes/multi.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -465,9 +465,13 @@ def _nbytes(self, deep=False):
465465
*this is in internal routine*
466466
467467
"""
468+
469+
# for implementations with no useful getsizeof (PyPy)
470+
objsize = 24
471+
468472
level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels))
469473
label_nbytes = sum((i.nbytes for i in self.labels))
470-
names_nbytes = sum((getsizeof(i) for i in self.names))
474+
names_nbytes = sum((getsizeof(i, objsize) for i in self.names))
471475
result = level_nbytes + label_nbytes + names_nbytes
472476

473477
# include our engine hashtable

Diff for: pandas/core/indexes/range.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,12 @@ def _format_data(self):
194194

195195
@cache_readonly
196196
def nbytes(self):
197-
""" return the number of bytes in the underlying data """
198-
return sum([getsizeof(getattr(self, v)) for v in
197+
"""
198+
Return the number of bytes in the underlying data
199+
On implementations where this is undetermined (PyPy)
200+
assume 24 bytes for each value
201+
"""
202+
return sum([getsizeof(getattr(self, v), 24) for v in
199203
['_start', '_stop', '_step']])
200204

201205
def memory_usage(self, deep=False):

Diff for: pandas/tests/frame/test_repr_info.py

+49-19
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import pytest
1212

1313
from pandas import (DataFrame, compat, option_context)
14-
from pandas.compat import StringIO, lrange, u
14+
from pandas.compat import StringIO, lrange, u, PYPY
1515
import pandas.io.formats.format as fmt
1616
import pandas as pd
1717

@@ -323,23 +323,6 @@ def test_info_memory_usage(self):
323323
# excluded column with object dtype, so estimate is accurate
324324
assert not re.match(r"memory usage: [^+]+\+", res[-1])
325325

326-
df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
327-
df_with_object_index.info(buf=buf, memory_usage=True)
328-
res = buf.getvalue().splitlines()
329-
assert re.match(r"memory usage: [^+]+\+", res[-1])
330-
331-
df_with_object_index.info(buf=buf, memory_usage='deep')
332-
res = buf.getvalue().splitlines()
333-
assert re.match(r"memory usage: [^+]+$", res[-1])
334-
335-
assert (df_with_object_index.memory_usage(
336-
index=True, deep=True).sum() > df_with_object_index.memory_usage(
337-
index=True).sum())
338-
339-
df_object = pd.DataFrame({'a': ['a']})
340-
assert (df_object.memory_usage(deep=True).sum() >
341-
df_object.memory_usage().sum())
342-
343326
# Test a DataFrame with duplicate columns
344327
dtypes = ['int64', 'int64', 'int64', 'float64']
345328
data = {}
@@ -349,6 +332,15 @@ def test_info_memory_usage(self):
349332
df = DataFrame(data)
350333
df.columns = dtypes
351334

335+
df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
336+
df_with_object_index.info(buf=buf, memory_usage=True)
337+
res = buf.getvalue().splitlines()
338+
assert re.match(r"memory usage: [^+]+\+", res[-1])
339+
340+
df_with_object_index.info(buf=buf, memory_usage='deep')
341+
res = buf.getvalue().splitlines()
342+
assert re.match(r"memory usage: [^+]+$", res[-1])
343+
352344
# Ensure df size is as expected
353345
# (cols * rows * bytes) + index size
354346
df_size = df.memory_usage().sum()
@@ -377,9 +369,47 @@ def test_info_memory_usage(self):
377369
df.memory_usage(index=True)
378370
df.index.values.nbytes
379371

372+
mem = df.memory_usage(deep=True).sum()
373+
374+
@pytest.mark.skipif(PYPY, reason="on PyPy deep=True does not change result")
375+
def test_info_memory_usage_deep_not_pypy(self):
376+
buf = StringIO()
377+
df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
378+
df_with_object_index.info(buf=buf, memory_usage=True)
379+
assert (df_with_object_index.memory_usage(
380+
index=True, deep=True).sum() >
381+
df_with_object_index.memory_usage(
382+
index=True).sum())
383+
384+
df_object = pd.DataFrame({'a': ['a']})
385+
assert (df_object.memory_usage(deep=True).sum() >
386+
df_object.memory_usage().sum())
387+
388+
@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result")
389+
def test_info_memory_usage_deep_pypy(self):
390+
buf = StringIO()
391+
df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
392+
assert (df_with_object_index.memory_usage(
393+
index=True, deep=True).sum() ==
394+
df_with_object_index.memory_usage(
395+
index=True).sum())
396+
397+
df_object = pd.DataFrame({'a': ['a']})
398+
assert (df_object.memory_usage(deep=True).sum() ==
399+
df_object.memory_usage().sum())
400+
401+
@pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design")
402+
def test_usage_via_getsizeof(self):
403+
df = DataFrame(
404+
data=1,
405+
index=pd.MultiIndex.from_product(
406+
[['a'], range(1000)]),
407+
columns=['A']
408+
)
409+
mem = df.memory_usage(deep=True).sum()
380410
# sys.getsizeof will call the .memory_usage with
381411
# deep=True, and add on some GC overhead
382-
diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
412+
diff = mem - sys.getsizeof(df)
383413
assert abs(diff) < 100
384414

385415
def test_info_memory_usage_qualified(self):

Diff for: pandas/tests/test_base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import pandas.util.testing as tm
1616
from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex,
1717
Timedelta, IntervalIndex, Interval)
18-
from pandas.compat import StringIO
18+
from pandas.compat import StringIO, PYPY
1919
from pandas.compat.numpy import np_array_datetime64_compat
2020
from pandas.core.base import PandasDelegate, NoNewAttributesMixin
2121
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
@@ -144,6 +144,7 @@ def f():
144144

145145
pytest.raises(TypeError, f)
146146

147+
@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
147148
def test_memory_usage(self):
148149
# Delegate does not implement memory_usage.
149150
# Check that we fall back to in-built `__sizeof__`
@@ -941,6 +942,7 @@ def test_fillna(self):
941942
# check shallow_copied
942943
assert o is not result
943944

945+
@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
944946
def test_memory_usage(self):
945947
for o in self.objs:
946948
res = o.memory_usage()

Diff for: pandas/tests/test_categorical.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
period_range, PeriodIndex,
2525
timedelta_range, TimedeltaIndex, NaT,
2626
Interval, IntervalIndex)
27-
from pandas.compat import range, lrange, u, PY3
27+
from pandas.compat import range, lrange, u, PY3, PYPY
2828
from pandas.core.config import option_context
2929

3030

@@ -1448,10 +1448,11 @@ def test_memory_usage(self):
14481448
cat = pd.Categorical(['foo', 'foo', 'bar'])
14491449
assert cat.memory_usage(deep=True) > cat.nbytes
14501450

1451-
# sys.getsizeof will call the .memory_usage with
1452-
# deep=True, and add on some GC overhead
1453-
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
1454-
assert abs(diff) < 100
1451+
if not PYPY:
1452+
# sys.getsizeof will call the .memory_usage with
1453+
# deep=True, and add on some GC overhead
1454+
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
1455+
assert abs(diff) < 100
14551456

14561457
def test_searchsorted(self):
14571458
# https://github.com./pandas-dev/pandas/issues/8420

Diff for: pandas/util/testing.py

-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
K = 4
5757
_RAISE_NETWORK_ERROR_DEFAULT = False
5858

59-
6059
# set testing_mode
6160
_testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning)
6261

0 commit comments

Comments
 (0)