Skip to content

Commit 7ccce35

Browse files
committed
API: Have MultiIndex constructors return MI
This removes the special case for MultiIndex constructors returning an Index if all the levels are length-1. Now this will return a MultiIndex with a single level. This is a backwards incompatabile change, with no clear method for deprecation, so we're making a clean break. Closes #17178
1 parent 66ec5f3 commit 7ccce35

File tree

12 files changed

+127
-45
lines changed

12 files changed

+127
-45
lines changed

Diff for: doc/source/whatsnew/v0.21.0.txt

+21
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,27 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical
273273

274274
The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.
275275

276+
.. _whatsnew_210.api.multiindex_single:
277+
278+
MultiIndex Constructor with a Single Level
279+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
280+
281+
Previous versions would automatically squeeze a ``MultiIndex`` with length-one
282+
``levels`` down to an ``Index``:
283+
284+
.. code-block:: ipython
285+
286+
In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)])
287+
Out[2]: Index(['a', 'b'], dtype='object')
288+
289+
Length 1 levels are no longer special-cased. They behave exactly as if you had
290+
length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
291+
``MultiIndex`` constructors:
292+
293+
.. ipython:: python
294+
295+
pd.MultiIndex.from_tuples([('a',), ('b',)])
296+
276297
.. _whatsnew_0210.api:
277298

278299
Other API Changes

Diff for: pandas/core/frame.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@
6767
_dict_compat,
6868
standardize_mapping)
6969
from pandas.core.generic import NDFrame, _shared_docs
70-
from pandas.core.index import Index, MultiIndex, _ensure_index
70+
from pandas.core.index import (Index, MultiIndex, _ensure_index,
71+
_index_from_sequences)
7172
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
7273
check_bool_indexer)
7374
from pandas.core.internals import (BlockManager,
@@ -1155,9 +1156,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
11551156
else:
11561157
try:
11571158
to_remove = [arr_columns.get_loc(field) for field in index]
1158-
1159-
result_index = MultiIndex.from_arrays(
1160-
[arrays[i] for i in to_remove], names=index)
1159+
index_data = [arrays[i] for i in to_remove]
1160+
result_index = _index_from_sequences(index_data,
1161+
names=index)
11611162

11621163
exclude.update(index)
11631164
except Exception:
@@ -3000,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
30003001
to_remove.append(col)
30013002
arrays.append(level)
30023003

3003-
index = MultiIndex.from_arrays(arrays, names=names)
3004+
index = _index_from_sequences(arrays, names)
30043005

30053006
if verify_integrity and not index.is_unique:
30063007
duplicates = index.get_duplicates()

Diff for: pandas/core/indexes/api.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
from pandas.core.indexes.base import (Index, _new_Index, # noqa
2-
_ensure_index, _get_na_value,
3-
InvalidIndexError)
1+
from pandas.core.indexes.base import (Index,
2+
_new_Index,
3+
_ensure_index,
4+
_index_from_sequences,
5+
_get_na_value,
6+
InvalidIndexError) # noqa
47
from pandas.core.indexes.category import CategoricalIndex # noqa
58
from pandas.core.indexes.multi import MultiIndex # noqa
69
from pandas.core.indexes.interval import IntervalIndex # noqa
@@ -22,7 +25,8 @@
2225
'InvalidIndexError', 'TimedeltaIndex',
2326
'PeriodIndex', 'DatetimeIndex',
2427
'_new_Index', 'NaT',
25-
'_ensure_index', '_get_na_value', '_get_combined_index',
28+
'_ensure_index', '_index_from_sequences', '_get_na_value',
29+
'_get_combined_index',
2630
'_get_objs_combined_axis',
2731
'_get_distinct_indexes', '_union_indexes',
2832
'_get_consensus_names',

Diff for: pandas/core/indexes/base.py

+27
Original file line numberDiff line numberDiff line change
@@ -4007,6 +4007,33 @@ def invalid_op(self, other=None):
40074007
Index._add_comparison_methods()
40084008

40094009

4010+
def _index_from_sequences(sequences, names=None):
4011+
"""Construct an index from sequences of data.
4012+
4013+
A single sequence returns an Index.
4014+
Many sequences returns a MultiIndex.
4015+
4016+
Examples
4017+
--------
4018+
4019+
>>> _index_from_sequences([[1, 2, 3]], names=['name'])
4020+
Int64Index([1, 2, 3], dtype='int64', name='name')
4021+
4022+
>>> _index_from_sequences([['a', 'a'], ['a', 'b']], names=['L1', 'L2'])
4023+
MultiIndex(levels=[['a'], ['a', 'b']],
4024+
labels=[[0, 0], [0, 1]],
4025+
names=['L1', 'L2'])
4026+
"""
4027+
from .multi import MultiIndex
4028+
4029+
if len(sequences) == 1:
4030+
if names is not None:
4031+
names = names[0]
4032+
return Index(sequences[0], name=names)
4033+
else:
4034+
return MultiIndex.from_arrays(sequences, names=names)
4035+
4036+
40104037
def _ensure_index(index_like, copy=False):
40114038
if isinstance(index_like, Index):
40124039
if copy:

Diff for: pandas/core/indexes/multi.py

-10
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
9191
raise ValueError('Length of levels and labels must be the same.')
9292
if len(levels) == 0:
9393
raise ValueError('Must pass non-zero number of levels/labels')
94-
if len(levels) == 1:
95-
if names:
96-
name = names[0]
97-
else:
98-
name = None
99-
return Index(levels[0], name=name, copy=True).take(labels[0])
10094

10195
result = object.__new__(MultiIndex)
10296

@@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
10841078
MultiIndex.from_product : Make a MultiIndex from cartesian product
10851079
of iterables
10861080
"""
1087-
if len(arrays) == 1:
1088-
name = None if names is None else names[0]
1089-
return Index(arrays[0], name=name)
1090-
10911081
# Check if lengths of all arrays are equal or not,
10921082
# raise ValueError, if not
10931083
for i in range(1, len(arrays)):

Diff for: pandas/core/reshape/reshape.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
from pandas.core.frame import _shared_docs
3333
from pandas.util._decorators import Appender
34-
from pandas.core.index import MultiIndex, _get_na_value
34+
from pandas.core.index import Index, MultiIndex, _get_na_value
3535

3636

3737
class _Unstacker(object):
@@ -311,10 +311,13 @@ def _unstack_multiple(data, clocs):
311311
recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels,
312312
xnull=False)
313313

314-
dummy_index = MultiIndex(levels=rlevels + [obs_ids],
315-
labels=rlabels + [comp_ids],
316-
names=rnames + ['__placeholder__'],
317-
verify_integrity=False)
314+
if rlocs == []:
315+
dummy_index = Index(obs_ids, name='__placeholder__')
316+
else:
317+
dummy_index = MultiIndex(levels=rlevels + [obs_ids],
318+
labels=rlabels + [comp_ids],
319+
names=rnames + ['__placeholder__'],
320+
verify_integrity=False)
318321

319322
if isinstance(data, Series):
320323
dummy = data.copy()
@@ -446,7 +449,10 @@ def _slow_pivot(index, columns, values):
446449

447450
def unstack(obj, level, fill_value=None):
448451
if isinstance(level, (tuple, list)):
449-
return _unstack_multiple(obj, level)
452+
if len(level) == 1:
453+
level = level[0]
454+
else:
455+
return _unstack_multiple(obj, level)
450456

451457
if isinstance(obj, DataFrame):
452458
if isinstance(obj.index, MultiIndex):

Diff for: pandas/core/sparse/scipy_sparse.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,11 @@ def robust_get_level_values(i):
7171
labels_to_i = Series(labels_to_i)
7272
if len(subset) > 1:
7373
labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index)
74-
labels_to_i.index.names = [index.names[i] for i in subset]
74+
labels_to_i.index.names = [index.names[i] for i in subset]
75+
else:
76+
labels_to_i.index = Index(x[0] for x in labels_to_i.index)
77+
labels_to_i.index.name = index.names[subset[0]]
78+
7579
labels_to_i.name = 'value'
7680
return (labels_to_i)
7781

Diff for: pandas/core/strings.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1452,7 +1452,10 @@ def cons_row(x):
14521452

14531453
if expand:
14541454
result = list(result)
1455-
return MultiIndex.from_tuples(result, names=name)
1455+
out = MultiIndex.from_tuples(result, names=name)
1456+
if out.nlevels == 1:
1457+
out = out.get_level_values(0)
1458+
return out
14561459
else:
14571460
return Index(result, name=name)
14581461
else:

Diff for: pandas/io/parsers.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
is_scalar, is_categorical_dtype)
2424
from pandas.core.dtypes.missing import isna
2525
from pandas.core.dtypes.cast import astype_nansafe
26-
from pandas.core.index import Index, MultiIndex, RangeIndex
26+
from pandas.core.index import (Index, MultiIndex, RangeIndex,
27+
_index_from_sequences)
2728
from pandas.core.series import Series
2829
from pandas.core.frame import DataFrame
2930
from pandas.core.categorical import Categorical
@@ -1444,7 +1445,16 @@ def _agg_index(self, index, try_parse_dates=True):
14441445
arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
14451446
arrays.append(arr)
14461447

1447-
index = MultiIndex.from_arrays(arrays, names=self.index_names)
1448+
names = self.index_names
1449+
index = _index_from_sequences(arrays, names)
1450+
if len(arrays) > 1:
1451+
index = MultiIndex.from_arrays(arrays, names=self.index_names)
1452+
else:
1453+
if self.index_names is None:
1454+
name = None
1455+
else:
1456+
name = self.index_names[0]
1457+
index = Index(arrays[0], name=name)
14481458

14491459
return index
14501460

@@ -1808,7 +1818,7 @@ def read(self, nrows=None):
18081818
try_parse_dates=True)
18091819
arrays.append(values)
18101820

1811-
index = MultiIndex.from_arrays(arrays)
1821+
index = _index_from_sequences(arrays)
18121822

18131823
if self.usecols is not None:
18141824
names = self._filter_usecols(names)
@@ -3138,9 +3148,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
31383148
if index_col is None or index_col is False:
31393149
index = Index([])
31403150
else:
3141-
index = [Series([], dtype=dtype[index_name])
3142-
for index_name in index_names]
3143-
index = MultiIndex.from_arrays(index, names=index_names)
3151+
data = [Series([], dtype=dtype[name]) for name in index_names]
3152+
index = _index_from_sequences(data, names=index_names)
31443153
index_col.sort()
31453154
for i, n in enumerate(index_col):
31463155
columns.pop(n - i)

Diff for: pandas/tests/indexes/test_base.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
DataFrame, Float64Index, Int64Index,
1818
CategoricalIndex, DatetimeIndex, TimedeltaIndex,
1919
PeriodIndex, isna)
20-
from pandas.core.index import _get_combined_index
20+
from pandas.core.index import _get_combined_index, _index_from_sequences
2121
from pandas.util.testing import assert_almost_equal
2222
from pandas.compat.numpy import np_datetime64_compat
2323

@@ -2103,3 +2103,19 @@ def test_intersect_str_dates(self):
21032103
res = i2.intersection(i1)
21042104

21052105
assert len(res) == 0
2106+
2107+
2108+
class TestIndexUtils(object):
2109+
2110+
@pytest.mark.parametrize('data, names, expected', [
2111+
([[1, 2, 3]], None, Index([1, 2, 3])),
2112+
([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')),
2113+
([['a', 'a'], ['c', 'd']], None,
2114+
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])),
2115+
([['a', 'a'], ['c', 'd']], ['L1', 'L2'],
2116+
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]],
2117+
names=['L1', 'L2'])),
2118+
])
2119+
def test_index_from_sequences(self, data, names, expected):
2120+
result = _index_from_sequences(data, names)
2121+
tm.assert_index_equal(result, expected)

Diff for: pandas/tests/indexes/test_multi.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -537,15 +537,12 @@ def test_astype(self):
537537
self.index.astype(np.dtype(int))
538538

539539
def test_constructor_single_level(self):
540-
single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
541-
labels=[[0, 1, 2, 3]], names=['first'])
542-
assert isinstance(single_level, Index)
543-
assert not isinstance(single_level, MultiIndex)
544-
assert single_level.name == 'first'
545-
546-
single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
547-
labels=[[0, 1, 2, 3]])
548-
assert single_level.name is None
540+
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
541+
labels=[[0, 1, 2, 3]], names=['first'])
542+
assert isinstance(result, MultiIndex)
543+
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
544+
tm.assert_index_equal(result.levels[0], expected)
545+
assert result.names == ['first']
549546

550547
def test_constructor_no_levels(self):
551548
tm.assert_raises_regex(ValueError, "non-zero number "
@@ -768,8 +765,9 @@ def test_from_arrays_empty(self):
768765

769766
# 1 level
770767
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
768+
assert isinstance(result, MultiIndex)
771769
expected = Index([], name='A')
772-
tm.assert_index_equal(result, expected)
770+
tm.assert_index_equal(result.levels[0], expected)
773771

774772
# N levels
775773
for N in [2, 3]:
@@ -830,7 +828,7 @@ def test_from_product_empty(self):
830828
# 1 level
831829
result = MultiIndex.from_product([[]], names=['A'])
832830
expected = pd.Index([], name='A')
833-
tm.assert_index_equal(result, expected)
831+
tm.assert_index_equal(result.levels[0], expected)
834832

835833
# 2 levels
836834
l1 = [[], ['foo', 'bar', 'baz'], []]

Diff for: pandas/util/testing.py

+3
Original file line numberDiff line numberDiff line change
@@ -1910,6 +1910,9 @@ def keyfunc(x):
19101910
# convert tuples to index
19111911
if nentries == 1:
19121912
index = Index(tuples[0], name=names[0])
1913+
elif nlevels == 1:
1914+
name = None if names is None else names[0]
1915+
index = Index((x[0] for x in tuples), name=name)
19131916
else:
19141917
index = MultiIndex.from_tuples(tuples, names=names)
19151918
return index

0 commit comments

Comments
 (0)