Skip to content

Commit 284649a

Browse files
committed
API: add "level=" argument to MultiIndex.unique()
closes pandas-dev#17896
1 parent 8dac633 commit 284649a

File tree

5 files changed

+95
-15
lines changed

5 files changed

+95
-15
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ Indexing
102102
^^^^^^^^
103103

104104
- Bug in :func:`PeriodIndex.truncate` which raises ``TypeError`` when ``PeriodIndex`` is monotonic (:issue:`17717`)
105-
-
105+
- :func:`MultiIndex.unique` now supports the ``level=`` argument (:issue:`17896`)
106106
-
107107

108108
I/O

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3758,7 +3758,9 @@ def drop(self, labels, errors='raise'):
37583758
return self.delete(indexer)
37593759

37603760
@Appender(base._shared_docs['unique'] % _index_doc_kwargs)
3761-
def unique(self):
3761+
def unique(self, level=None):
3762+
if level not in {0, self.name, None}:
3763+
raise ValueError("Level {} not found".format(level))
37623764
result = super(Index, self).unique()
37633765
return self._shallow_copy(result)
37643766

pandas/core/indexes/multi.py

+40-5
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,7 @@ def _try_mi(k):
886886

887887
raise InvalidIndexError(key)
888888

889-
def _get_level_values(self, level):
889+
def _get_level_values(self, level, unique=False):
890890
"""
891891
Return vector of label values for requested level,
892892
equal to the length of the index
@@ -896,17 +896,23 @@ def _get_level_values(self, level):
896896
Parameters
897897
----------
898898
level : int level
899+
unique : bool
900+
if True, drop duplicated values
901+
902+
.. versionadded:: 0.22.0
899903
900904
Returns
901905
-------
902906
values : ndarray
903907
"""
904908

905-
unique = self.levels[level]
909+
values = self.levels[level]
906910
labels = self.labels[level]
907-
filled = algos.take_1d(unique._values, labels,
908-
fill_value=unique._na_value)
909-
values = unique._shallow_copy(filled)
911+
if unique:
912+
labels = algos.unique(labels)
913+
filled = algos.take_1d(values._values, labels,
914+
fill_value=values._na_value)
915+
values = values._shallow_copy(filled)
910916
return values
911917

912918
def get_level_values(self, level):
@@ -945,6 +951,35 @@ def get_level_values(self, level):
945951
values = self._get_level_values(level)
946952
return values
947953

954+
def unique(self, level=None):
955+
"""
956+
Return unique values in the index. Uniques are returned in order
957+
of appearance, this does NOT sort.
958+
959+
Parameters
960+
----------
961+
level : int or str, optional, defaults None
962+
only return values from specified level
963+
964+
.. versionadded:: 0.21.0
965+
966+
Returns
967+
-------
968+
Index without duplicates (MultiIndex as long as level=None)
969+
970+
See Also
971+
--------
972+
unique
973+
Index.unique
974+
Series.unique
975+
"""
976+
977+
if level is None:
978+
return super(MultiIndex, self).unique()
979+
else:
980+
level = self._get_level_number(level)
981+
return self._get_level_values(level=level, unique=True)
982+
948983
def format(self, space=2, sparsify=None, adjoin=True, names=False,
949984
na_rep=None, formatter=None):
950985
if len(self) == 0:

pandas/tests/indexes/common.py

+19
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,25 @@ def test_duplicates(self, indices):
329329
assert not idx.is_unique
330330
assert idx.has_duplicates
331331

332+
def test_unique(self):
333+
# GH 17896
334+
idx = pd.Index([2, 3, 2, 1], name='my_index')
335+
expected = pd.Index([2, 3, 1], name='my_index')
336+
for level in 0, 'my_index', None:
337+
result = idx.unique(level=level)
338+
tm.assert_index_equal(result, expected)
339+
340+
for level in 3, 'wrong':
341+
msg = "Level {} not found".format(level)
342+
with tm.assert_raises_regex(ValueError, msg):
343+
idx.unique(level=level)
344+
345+
# with NaNs
346+
idx = pd.Index([2, np.nan, 2, 1], name='my_index')
347+
expected = pd.Index([2, np.nan, 1], name='my_index')
348+
result = idx.unique()
349+
tm.assert_index_equal(result, expected)
350+
332351
def test_get_unique_index(self, indices):
333352
# MultiIndex tested separately
334353
if not len(indices) or isinstance(indices, MultiIndex):

pandas/tests/indexes/test_multi.py

+32-8
Original file line numberDiff line numberDiff line change
@@ -955,19 +955,21 @@ def test_get_level_values(self):
955955
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
956956
tm.assert_index_equal(index.get_level_values(1), exp)
957957

958-
def test_get_level_values_na(self):
958+
@pytest.mark.xfail(reason='GH 17924 (returns Int64Index with float data)')
959+
def test_get_level_values_int_with_na(self):
959960
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
960961
index = pd.MultiIndex.from_arrays(arrays)
961-
values = index.get_level_values(1)
962-
expected = np.array([1, np.nan, 2])
963-
tm.assert_numpy_array_equal(values.values.astype(float), expected)
962+
result = index.get_level_values(1)
963+
expected = Index([1, np.nan, 2])
964+
tm.assert_index_equal(result, expected)
964965

965966
arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
966967
index = pd.MultiIndex.from_arrays(arrays)
967-
values = index.get_level_values(1)
968-
expected = np.array([np.nan, np.nan, 2])
969-
tm.assert_numpy_array_equal(values.values.astype(float), expected)
968+
result = index.get_level_values(1)
969+
expected = Index([np.nan, np.nan, 2])
970+
tm.assert_index_equal(result, expected)
970971

972+
def test_get_level_values_na(self):
971973
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
972974
index = pd.MultiIndex.from_arrays(arrays)
973975
result = index.get_level_values(0)
@@ -982,7 +984,7 @@ def test_get_level_values_na(self):
982984
index = pd.MultiIndex.from_arrays(arrays)
983985
values = index.get_level_values(1)
984986
expected = pd.DatetimeIndex([0, 1, pd.NaT])
985-
tm.assert_numpy_array_equal(values.values, expected.values)
987+
tm.assert_index_equal(values, expected)
986988

987989
arrays = [[], []]
988990
index = pd.MultiIndex.from_arrays(arrays)
@@ -2269,6 +2271,28 @@ def test_unique(self):
22692271
exp = pd.MultiIndex.from_arrays([['a'], ['a']])
22702272
tm.assert_index_equal(res, exp)
22712273

2274+
# GH #17896 - with level= argument
2275+
for level in 0, 'first':
2276+
result = self.index.unique(level=level)
2277+
expected = Index(['foo', 'bar', 'baz', 'qux'],
2278+
name='first')
2279+
tm.assert_index_equal(result, expected)
2280+
2281+
# With already unique level
2282+
mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 1, 1, 2]],
2283+
names=['first', 'second'])
2284+
result = mi.unique(level=level)
2285+
expected = mi.get_level_values(level)
2286+
tm.assert_index_equal(result, expected)
2287+
2288+
@pytest.mark.xfail(reason='GH 17924 (returns Int64Index with float data)')
2289+
def test_unique_with_nans(self):
2290+
# with NaNs
2291+
index = pd.MultiIndex.from_arrays([['a', 'b', 'b'], [2, np.nan, 2]])
2292+
result = index.unique(level=1)
2293+
expected = pd.Index([2, np.nan])
2294+
tm.assert_index_equal(result, expected)
2295+
22722296
def test_unique_datetimelike(self):
22732297
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
22742298
'2015-01-01', 'NaT', 'NaT'])

0 commit comments

Comments
 (0)