15
15
from pandas.compat import range, zip, lrange, lzip, u, map
16
16
from pandas import compat
17
17
from pandas.core import algorithms
18
- from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, _shared_docs, PandasDelegate
18
+ from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, PandasDelegate
19
+ import pandas.core.base as base
19
20
from pandas.util.decorators import (Appender, Substitution, cache_readonly,
20
21
deprecate, deprecate_kwarg)
21
22
import pandas.core.common as com
29
30
from pandas.io.common import PerformanceWarning
30
31
31
32
32
-
33
-
34
33
# simplify
35
34
default_pprint = lambda x, max_seq_items=None: com.pprint_thing(x,
36
35
escape_chars=('\t', '\r', '\n'),
45
44
46
45
_index_doc_kwargs = dict(klass='Index', inplace='',
47
46
duplicated='np.array')
47
+ _index_shared_docs = dict()
48
48
49
49
50
50
def _try_get_item(x):
@@ -108,6 +108,7 @@ class Index(IndexOpsMixin, PandasObject):
108
108
_allow_datetime_index_ops = False
109
109
_allow_period_index_ops = False
110
110
_is_numeric_dtype = False
111
+ _can_hold_na = True
111
112
112
113
_engine_type = _index.ObjectEngine
113
114
@@ -1236,6 +1237,43 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None):
1236
1237
taken = self.values.take(indices)
1237
1238
return self._shallow_copy(taken)
1238
1239
1240
+ @cache_readonly
1241
+ def _isnan(self):
1242
+ """ return if each value is nan"""
1243
+ if self._can_hold_na:
1244
+ return isnull(self)
1245
+ else:
1246
+ # shouldn't reach to this condition by checking hasnans beforehand
1247
+ values = np.empty(len(self), dtype=np.bool_)
1248
+ values.fill(False)
1249
+ return values
1250
+
1251
+ @cache_readonly
1252
+ def _nan_idxs(self):
1253
+ if self._can_hold_na:
1254
+ w, = self._isnan.nonzero()
1255
+ return w
1256
+ else:
1257
+ return np.array([], dtype=np.int64)
1258
+
1259
+ @cache_readonly
1260
+ def hasnans(self):
1261
+ """ return if I have any nans; enables various perf speedups """
1262
+ if self._can_hold_na:
1263
+ return self._isnan.any()
1264
+ else:
1265
+ return False
1266
+
1267
+ def _convert_for_op(self, value):
1268
+ """ Convert value to be insertable to ndarray """
1269
+ return value
1270
+
1271
+ def _assert_can_do_op(self, value):
1272
+ """ Check value is valid for scalar op """
1273
+ if not lib.isscalar(value):
1274
+ msg = "'value' must be a scalar, passed: {0}"
1275
+ raise TypeError(msg.format(type(value).__name__))
1276
+
1239
1277
def putmask(self, mask, value):
1240
1278
"""
1241
1279
return a new Index of the values set with the mask
@@ -1245,8 +1283,12 @@ def putmask(self, mask, value):
1245
1283
numpy.ndarray.putmask
1246
1284
"""
1247
1285
values = self.values.copy()
1248
- np.putmask(values, mask, value)
1249
- return self._shallow_copy(values)
1286
+ try:
1287
+ np.putmask(values, mask, self._convert_for_op(value))
1288
+ return self._shallow_copy(values)
1289
+ except (ValueError, TypeError):
1290
+ # coerces to object
1291
+ return self.astype(object).putmask(mask, value)
1250
1292
1251
1293
def format(self, name=False, formatter=None, **kwargs):
1252
1294
"""
@@ -2766,15 +2808,45 @@ def drop(self, labels, errors='raise'):
2766
2808
return self.delete(indexer)
2767
2809
2768
2810
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
2769
- @Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs)
2811
+ @Appender(base. _shared_docs['drop_duplicates'] % _index_doc_kwargs)
2770
2812
def drop_duplicates(self, keep='first'):
2771
2813
return super(Index, self).drop_duplicates(keep=keep)
2772
2814
2773
2815
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
2774
- @Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
2816
+ @Appender(base. _shared_docs['duplicated'] % _index_doc_kwargs)
2775
2817
def duplicated(self, keep='first'):
2776
2818
return super(Index, self).duplicated(keep=keep)
2777
2819
2820
+ _index_shared_docs['fillna'] = """
2821
+ Fill NA/NaN values with the specified value
2822
+
2823
+ Parameters
2824
+ ----------
2825
+ value : scalar
2826
+ Scalar value to use to fill holes (e.g. 0).
2827
+ This value cannot be a list-likes.
2828
+ downcast : dict, default is None
2829
+ a dict of item->dtype of what to downcast if possible,
2830
+ or the string 'infer' which will try to downcast to an appropriate
2831
+ equal type (e.g. float64 to int64 if possible)
2832
+
2833
+ Returns
2834
+ -------
2835
+ filled : Index
2836
+ """
2837
+
2838
+ @Appender(_index_shared_docs['fillna'])
2839
+ def fillna(self, value=None, downcast=None):
2840
+ self._assert_can_do_op(value)
2841
+ if self.hasnans:
2842
+ result = self.putmask(self._isnan, value)
2843
+ if downcast is None:
2844
+ # no need to care metadata other than name
2845
+ # because it can't have freq if
2846
+ return Index(result, name=self.name)
2847
+
2848
+ return self._shallow_copy()
2849
+
2778
2850
def _evaluate_with_timedelta_like(self, other, op, opstr):
2779
2851
raise TypeError("can only perform ops with timedelta like values")
2780
2852
@@ -3200,6 +3272,16 @@ def __array__(self, dtype=None):
3200
3272
""" the array interface, return my values """
3201
3273
return np.array(self._data, dtype=dtype)
3202
3274
3275
+ @cache_readonly
3276
+ def _isnan(self):
3277
+ """ return if each value is nan"""
3278
+ return self._data.codes == -1
3279
+
3280
+ @Appender(_index_shared_docs['fillna'])
3281
+ def fillna(self, value, downcast=None):
3282
+ self._assert_can_do_op(value)
3283
+ return CategoricalIndex(self._data.fillna(value), name=self.name)
3284
+
3203
3285
def argsort(self, *args, **kwargs):
3204
3286
return self.values.argsort(*args, **kwargs)
3205
3287
@@ -3214,7 +3296,7 @@ def is_unique(self):
3214
3296
return not self.duplicated().any()
3215
3297
3216
3298
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
3217
- @Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
3299
+ @Appender(base. _shared_docs['duplicated'] % _index_doc_kwargs)
3218
3300
def duplicated(self, keep='first'):
3219
3301
from pandas.hashtable import duplicated_int64
3220
3302
return duplicated_int64(self.codes.astype('i8'), keep)
@@ -3612,6 +3694,8 @@ class Int64Index(NumericIndex):
3612
3694
_inner_indexer = _algos.inner_join_indexer_int64
3613
3695
_outer_indexer = _algos.outer_join_indexer_int64
3614
3696
3697
+ _can_hold_na = False
3698
+
3615
3699
_engine_type = _index.Int64Engine
3616
3700
3617
3701
def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, **kwargs):
@@ -3646,11 +3730,6 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, *
3646
3730
def inferred_type(self):
3647
3731
return 'integer'
3648
3732
3649
- @cache_readonly
3650
- def hasnans(self):
3651
- # by definition
3652
- return False
3653
-
3654
3733
@property
3655
3734
def asi8(self):
3656
3735
# do not cache or you'll create a memory leak
@@ -3872,19 +3951,6 @@ def is_all_dates(self):
3872
3951
"""
3873
3952
return False
3874
3953
3875
- @cache_readonly
3876
- def _nan_idxs(self):
3877
- w, = self._isnan.nonzero()
3878
- return w
3879
-
3880
- @cache_readonly
3881
- def _isnan(self):
3882
- return np.isnan(self.values)
3883
-
3884
- @cache_readonly
3885
- def hasnans(self):
3886
- return self._isnan.any()
3887
-
3888
3954
@cache_readonly
3889
3955
def is_unique(self):
3890
3956
return super(Float64Index, self).is_unique and self._nan_idxs.size < 2
@@ -4414,7 +4480,7 @@ def is_unique(self):
4414
4480
return not self.duplicated().any()
4415
4481
4416
4482
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'})
4417
- @Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
4483
+ @Appender(base. _shared_docs['duplicated'] % _index_doc_kwargs)
4418
4484
def duplicated(self, keep='first'):
4419
4485
from pandas.core.groupby import get_group_index
4420
4486
from pandas.hashtable import duplicated_int64
@@ -4424,6 +4490,11 @@ def duplicated(self, keep='first'):
4424
4490
4425
4491
return duplicated_int64(ids, keep)
4426
4492
4493
+ @Appender(_index_shared_docs['fillna'])
4494
+ def fillna(self, value=None, downcast=None):
4495
+ # isnull is not implemented for MultiIndex
4496
+ raise NotImplementedError('isnull is not defined for MultiIndex')
4497
+
4427
4498
def get_value(self, series, key):
4428
4499
# somewhat broken encapsulation
4429
4500
from pandas.core.indexing import maybe_droplevels
0 commit comments