Skip to content

Commit fb6116f

Browse files
toobazjreback
authored andcommitted
BUG: raise KeyError if MultiIndex.get_loc_level is asked unused label (#22230)
* BUG: raise KeyError if MultiIndex.get_loc_level is asked unused label closes #22221 * TST: test groupby.apply() with user-defined function returning an empty chunk * CLN: remove named lambda
1 parent 91686e6 commit fb6116f

File tree

4 files changed

+34
-18
lines changed

4 files changed

+34
-18
lines changed

doc/source/whatsnew/v0.24.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -622,8 +622,8 @@ Missing
622622
MultiIndex
623623
^^^^^^^^^^
624624

625-
- Removed compatibility for MultiIndex pickles prior to version 0.8.0; compatibility with MultiIndex pickles from version 0.13 forward is maintained (:issue:`21654`)
626-
-
625+
- Removed compatibility for :class:`MultiIndex` pickles prior to version 0.8.0; compatibility with :class:`MultiIndex` pickles from version 0.13 forward is maintained (:issue:`21654`)
626+
- :meth:`MultiIndex.get_loc_level` (and as a consequence, ``.loc`` on a :class:``MultiIndex``ed object) will now raise a ``KeyError``, rather than returning an empty ``slice``, if asked a label which is present in the ``levels`` but is unused (:issue:`22221`)
627627
-
628628

629629
I/O

pandas/core/indexes/multi.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -2185,11 +2185,6 @@ def _maybe_to_slice(loc):
21852185

21862186
if not isinstance(key, tuple):
21872187
loc = self._get_level_indexer(key, level=0)
2188-
2189-
# _get_level_indexer returns an empty slice if the key has
2190-
# been dropped from the MultiIndex
2191-
if isinstance(loc, slice) and loc.start == loc.stop:
2192-
raise KeyError(key)
21932188
return _maybe_to_slice(loc)
21942189

21952190
keylen = len(key)
@@ -2443,14 +2438,21 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels):
24432438

24442439
else:
24452440

2446-
loc = level_index.get_loc(key)
2447-
if isinstance(loc, slice):
2448-
return loc
2449-
elif level > 0 or self.lexsort_depth == 0:
2450-
return np.array(labels == loc, dtype=bool)
2441+
code = level_index.get_loc(key)
2442+
2443+
if level > 0 or self.lexsort_depth == 0:
2444+
# Desired level is not sorted
2445+
locs = np.array(labels == code, dtype=bool, copy=False)
2446+
if not locs.any():
2447+
# The label is present in self.levels[level] but unused:
2448+
raise KeyError(key)
2449+
return locs
24512450

2452-
i = labels.searchsorted(loc, side='left')
2453-
j = labels.searchsorted(loc, side='right')
2451+
i = labels.searchsorted(code, side='left')
2452+
j = labels.searchsorted(code, side='right')
2453+
if i == j:
2454+
# The label is present in self.levels[level] but unused:
2455+
raise KeyError(key)
24542456
return slice(i, j)
24552457

24562458
def get_locs(self, seq):

pandas/tests/groupby/test_apply.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,7 @@ def test_apply_chunk_view():
271271
df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3],
272272
'value': compat.lrange(9)})
273273

274-
# return view
275-
f = lambda x: x[:2]
276-
277-
result = df.groupby('key', group_keys=False).apply(f)
274+
result = df.groupby('key', group_keys=False).apply(lambda x: x[:2])
278275
expected = df.take([0, 1, 3, 4, 6, 7])
279276
tm.assert_frame_equal(result, expected)
280277

@@ -518,6 +515,19 @@ def test_func(x):
518515
tm.assert_frame_equal(result2, expected2)
519516

520517

518+
def test_groupby_apply_return_empty_chunk():
519+
# GH 22221: apply filter which returns some empty groups
520+
df = pd.DataFrame(dict(value=[0, 1], group=['filled', 'empty']))
521+
groups = df.groupby('group')
522+
result = groups.apply(lambda group: group[group.value != 1]['value'])
523+
expected = pd.Series([0], name='value',
524+
index=MultiIndex.from_product([['empty', 'filled'],
525+
[0]],
526+
names=['group', None]
527+
).drop('empty'))
528+
tm.assert_series_equal(result, expected)
529+
530+
521531
def test_apply_with_mixed_types():
522532
# gh-20949
523533
df = pd.DataFrame({'A': 'a a b'.split(), 'B': [1, 2, 3], 'C': [4, 6, 5]})

pandas/tests/indexes/multi/test_indexing.py

+4
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,10 @@ def test_get_loc_level():
271271
assert new_index is None
272272

273273
pytest.raises(KeyError, index.get_loc_level, (2, 2))
274+
# GH 22221: unused label
275+
pytest.raises(KeyError, index.drop(2).get_loc_level, 2)
276+
# Unused label on unsorted level:
277+
pytest.raises(KeyError, index.drop(1, level=2).get_loc_level, 2, 2)
274278

275279
index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array(
276280
[0, 0, 0, 0]), np.array([0, 1, 2, 3])])

0 commit comments

Comments
 (0)