Skip to content

Commit d5c9a22

Browse files
committed
Merge pull request #8227 from jreback/index_api
API: raise on setops for + and - for Indexes (GH8226)
2 parents 54678dd + 07a46af commit d5c9a22

23 files changed

+110
-98
lines changed

doc/source/indexing.rst

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,28 +1616,33 @@ display:
16161616
df
16171617
df['A']
16181618
1619+
.. _indexing.setops:
16191620
16201621
Set operations on Index objects
16211622
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16221623
1624+
.. warning::
1625+
1626+
In 0.15.0. the set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain
1627+
index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``.
1628+
16231629
.. _indexing.set_ops:
16241630
1625-
The three main operations are ``union (|)``, ``intersection (&)``, and ``diff
1626-
(-)``. These can be directly called as instance methods or used via overloaded
1627-
operators:
1631+
The two main operations are ``union (|)``, ``intersection (&)``
1632+
These can be directly called as instance methods or used via overloaded
1633+
operators. Difference is provided via the ``.difference()`` method.
16281634
16291635
.. ipython:: python
16301636
16311637
a = Index(['c', 'b', 'a'])
16321638
b = Index(['c', 'e', 'd'])
1633-
a.union(b)
16341639
a | b
16351640
a & b
1636-
a - b
1641+
a.difference(b)
16371642
16381643
Also available is the ``sym_diff (^)`` operation, which returns elements
16391644
that appear in either ``idx1`` or ``idx2`` but not both. This is
1640-
equivalent to the Index created by ``(idx1 - idx2) + (idx2 - idx1)``,
1645+
equivalent to the Index created by ``(idx1.difference(idx2)).union(idx2.difference(idx1))``,
16411646
with duplicates dropped.
16421647

16431648
.. ipython:: python

doc/source/v0.15.0.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ users upgrade to this version.
1919
- Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring <whatsnew_0150.refactoring>`
2020
- New datetimelike properties accessor ``.dt`` for Series, see :ref:`Datetimelike Properties <whatsnew_0150.dt>`
2121
- dropping support for ``PyTables`` less than version 3.0.0, and ``numexpr`` less than version 2.1 (:issue:`7990`)
22+
- API change in using Indexs set operations, see :ref:`here <whatsnew_0150.index_set_ops>`
2223

2324
- :ref:`Other Enhancements <whatsnew_0150.enhancements>`
2425

@@ -343,6 +344,11 @@ API changes
343344
- ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of
344345
``DataFrame.to_csv()`` (:issue:`8215`).
345346

347+
348+
.. _whatsnew_0150.index_set_ops:
349+
350+
- The Index set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``. Further the method name ``Index.diff()`` is deprecated and can be replaced by ``Index.difference()``
351+
346352
.. _whatsnew_0150.dt:
347353

348354
.dt accessor

pandas/core/base.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -711,8 +711,10 @@ def __add__(self, other):
711711
from pandas.core.index import Index
712712
from pandas.tseries.offsets import DateOffset
713713
if isinstance(other, Index):
714+
warnings.warn("using '+' to provide set union with Indexes is deprecated, "
715+
"use .union()",FutureWarning)
714716
return self.union(other)
715-
elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
717+
if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
716718
return self._add_delta(other)
717719
elif com.is_integer(other):
718720
return self.shift(other)
@@ -723,8 +725,10 @@ def __sub__(self, other):
723725
from pandas.core.index import Index
724726
from pandas.tseries.offsets import DateOffset
725727
if isinstance(other, Index):
726-
return self.diff(other)
727-
elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
728+
warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
729+
"use .difference()",FutureWarning)
730+
return self.difference(other)
731+
if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
728732
return self._add_delta(-other)
729733
elif com.is_integer(other):
730734
return self.shift(-other)

pandas/core/categorical.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ def reorder_levels(self, new_levels, ordered=None):
435435
"""
436436
new_levels = self._validate_levels(new_levels)
437437

438-
if len(new_levels) < len(self._levels) or len(self._levels-new_levels):
438+
if len(new_levels) < len(self._levels) or len(self._levels.difference(new_levels)):
439439
raise ValueError('Reordered levels must include all original levels')
440440
values = self.__array__()
441441
self._codes = _get_codes_for_values(values, new_levels)
@@ -887,7 +887,7 @@ def __setitem__(self, key, value):
887887
raise ValueError("cannot set a Categorical with another, without identical levels")
888888

889889
rvalue = value if com.is_list_like(value) else [value]
890-
to_add = Index(rvalue)-self.levels
890+
to_add = Index(rvalue).difference(self.levels)
891891
# no assignments of values not in levels, but it's always ok to set something to np.nan
892892
if len(to_add) and not isnull(to_add).all():
893893
raise ValueError("cannot setitem on a Categorical with a new level,"

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3682,7 +3682,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
36823682
'ignore_index=True')
36833683

36843684
index = None if other.name is None else [other.name]
3685-
combined_columns = self.columns.tolist() + ((self.columns | other.index) - self.columns).tolist()
3685+
combined_columns = self.columns.tolist() + (self.columns | other.index).difference(self.columns).tolist()
36863686
other = other.reindex(combined_columns, copy=False)
36873687
other = DataFrame(other.values.reshape((1, len(other))),
36883688
index=index, columns=combined_columns).convert_objects()

pandas/core/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ def _set_selection_from_grouper(self):
473473
ax = self.obj._info_axis
474474
groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ]
475475
if len(groupers):
476-
self._group_selection = (ax-Index(groupers)).tolist()
476+
self._group_selection = ax.difference(Index(groupers)).tolist()
477477

478478
def _set_result_index_ordered(self, result):
479479
# set the result index on the passed values object

pandas/core/index.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,9 +1128,10 @@ def argsort(self, *args, **kwargs):
11281128

11291129
def __add__(self, other):
11301130
if isinstance(other, Index):
1131+
warnings.warn("using '+' to provide set union with Indexes is deprecated, "
1132+
"use '|' or .union()",FutureWarning)
11311133
return self.union(other)
1132-
else:
1133-
return Index(np.array(self) + other)
1134+
return Index(np.array(self) + other)
11341135

11351136
__iadd__ = __add__
11361137
__eq__ = _indexOp('__eq__')
@@ -1141,7 +1142,10 @@ def __add__(self, other):
11411142
__ge__ = _indexOp('__ge__')
11421143

11431144
def __sub__(self, other):
1144-
return self.diff(other)
1145+
if isinstance(other, Index):
1146+
warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
1147+
"use .difference()",FutureWarning)
1148+
return self.difference(other)
11451149

11461150
def __and__(self, other):
11471151
return self.intersection(other)
@@ -1273,7 +1277,7 @@ def intersection(self, other):
12731277
taken.name = None
12741278
return taken
12751279

1276-
def diff(self, other):
1280+
def difference(self, other):
12771281
"""
12781282
Compute sorted set difference of two Index objects
12791283
@@ -1289,8 +1293,7 @@ def diff(self, other):
12891293
-----
12901294
One can do either of these and achieve the same result
12911295
1292-
>>> index - index2
1293-
>>> index.diff(index2)
1296+
>>> index.difference(index2)
12941297
"""
12951298

12961299
if not hasattr(other, '__iter__'):
@@ -1308,6 +1311,8 @@ def diff(self, other):
13081311
theDiff = sorted(set(self) - set(other))
13091312
return Index(theDiff, name=result_name)
13101313

1314+
diff = deprecate('diff',difference)
1315+
13111316
def sym_diff(self, other, result_name=None):
13121317
"""
13131318
Compute the sorted symmetric difference of two Index objects.
@@ -1350,7 +1355,7 @@ def sym_diff(self, other, result_name=None):
13501355
other = Index(other)
13511356
result_name = result_name or self.name
13521357

1353-
the_diff = sorted(set((self - other) + (other - self)))
1358+
the_diff = sorted(set((self.difference(other)).union(other.difference(self))))
13541359
return Index(the_diff, name=result_name)
13551360

13561361
def get_loc(self, key):
@@ -4135,6 +4140,8 @@ def union(self, other):
41354140
Returns
41364141
-------
41374142
Index
4143+
4144+
>>> index.union(index2)
41384145
"""
41394146
self._assert_can_do_setop(other)
41404147

@@ -4177,7 +4184,7 @@ def intersection(self, other):
41774184
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
41784185
names=result_names)
41794186

4180-
def diff(self, other):
4187+
def difference(self, other):
41814188
"""
41824189
Compute sorted set difference of two MultiIndex objects
41834190

pandas/core/panel.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -678,9 +678,9 @@ def _combine_frame(self, other, func, axis=0):
678678
self.minor_axis)
679679

680680
def _combine_panel(self, other, func):
681-
items = self.items + other.items
682-
major = self.major_axis + other.major_axis
683-
minor = self.minor_axis + other.minor_axis
681+
items = self.items.union(other.items)
682+
major = self.major_axis.union(other.major_axis)
683+
minor = self.minor_axis.union(other.minor_axis)
684684

685685
# could check that everything's the same size, but forget it
686686
this = self.reindex(items=items, major=major, minor=minor)

pandas/core/panelnd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def _combine_with_constructor(self, other, func):
8282
# combine labels to form new axes
8383
new_axes = []
8484
for a in self._AXIS_ORDERS:
85-
new_axes.append(getattr(self, a) + getattr(other, a))
85+
new_axes.append(getattr(self, a).union(getattr(other, a)))
8686

8787
# reindex: could check that everything's the same size, but forget it
8888
d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, new_axes)])

pandas/core/reshape.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -848,7 +848,7 @@ def lreshape(data, groups, dropna=True, label=None):
848848
keys, values = zip(*groups)
849849

850850
all_cols = list(set.union(*[set(x) for x in values]))
851-
id_cols = list(data.columns.diff(all_cols))
851+
id_cols = list(data.columns.difference(all_cols))
852852

853853
K = len(values[0])
854854

0 commit comments

Comments
 (0)