Skip to content

Commit 9d8dbef

Browse files
databasedavjreback
authored andcommitted
BUG: x in MultiIndex.drop(x) (#19054)
1 parent 64c1920 commit 9d8dbef

File tree

5 files changed

+36
-8
lines changed

5 files changed

+36
-8
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ Indexing
398398
- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`)
399399
- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`)
400400
- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`)
401+
- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`)
401402
- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`)
402403
- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`)
403404

pandas/core/indexes/multi.py

+5
Original file line numberDiff line numberDiff line change
@@ -2123,6 +2123,11 @@ def _maybe_to_slice(loc):
21232123

21242124
if not isinstance(key, tuple):
21252125
loc = self._get_level_indexer(key, level=0)
2126+
2127+
# _get_level_indexer returns an empty slice if the key has
2128+
# been dropped from the MultiIndex
2129+
if isinstance(loc, slice) and loc.start == loc.stop:
2130+
raise KeyError(key)
21262131
return _maybe_to_slice(loc)
21272132

21282133
keylen = len(key)

pandas/core/reshape/reshape.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,11 @@ def _convert_level_number(level_num, columns):
628628
levsize = len(level_labels)
629629
drop_cols = []
630630
for key in unique_groups:
631-
loc = this.columns.get_loc(key)
631+
try:
632+
loc = this.columns.get_loc(key)
633+
except KeyError:
634+
drop_cols.append(key)
635+
continue
632636

633637
# can make more efficient?
634638
# we almost always return a slice
@@ -639,10 +643,7 @@ def _convert_level_number(level_num, columns):
639643
else:
640644
slice_len = loc.stop - loc.start
641645

642-
if slice_len == 0:
643-
drop_cols.append(key)
644-
continue
645-
elif slice_len != levsize:
646+
if slice_len != levsize:
646647
chunk = this.loc[:, this.columns[loc]]
647648
chunk.columns = level_vals.take(chunk.columns.labels[-1])
648649
value_slice = chunk.reindex(columns=level_vals_used).values

pandas/tests/frame/test_mutate_columns.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,10 @@ def test_delitem_multiindex(self):
193193
with pytest.raises(KeyError):
194194
del df[('A',)]
195195

196-
# xref: https://github.com./pandas-dev/pandas/issues/2770
197-
# the 'A' is STILL in the columns!
198-
assert 'A' in df.columns
196+
# behavior of dropped/deleted MultiIndex levels changed from
197+
# GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
198+
# levels which are dropped/deleted
199+
assert 'A' not in df.columns
199200
with pytest.raises(KeyError):
200201
del df['A']
201202

pandas/tests/indexing/test_multiindex.py

+20
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,26 @@ def test_multiindex_symmetric_difference(self):
705705
result = idx ^ idx2
706706
assert result.names == [None, None]
707707

708+
def test_multiindex_contains_dropped(self):
709+
# GH 19027
710+
# test that dropped MultiIndex levels are not in the MultiIndex
711+
# despite continuing to be in the MultiIndex's levels
712+
idx = MultiIndex.from_product([[1, 2], [3, 4]])
713+
assert 2 in idx
714+
idx = idx.drop(2)
715+
716+
# drop implementation keeps 2 in the levels
717+
assert 2 in idx.levels[0]
718+
# but it should no longer be in the index itself
719+
assert 2 not in idx
720+
721+
# also applies to strings
722+
idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
723+
assert 'a' in idx
724+
idx = idx.drop('a')
725+
assert 'a' in idx.levels[0]
726+
assert 'a' not in idx
727+
708728

709729
class TestMultiIndexSlicers(object):
710730

0 commit comments

Comments
 (0)