22SparseArray data structure
33"""
44from __future__ import division
5- # pylint: disable=E1101,E1103,W0231
65
7- import re
8- import operator
96import numbers
10- import numpy as np
7+ import operator
8+ import re
119import warnings
1210
13- import pandas as pd
14- from pandas .core .base import PandasObject
11+ import numpy as np
1512
13+ import pandas ._libs .sparse as splib
14+ import pandas .core .algorithms as algos
15+ import pandas .core .common as com
16+ import pandas .io .formats .printing as printing
1617from pandas import compat
17- from pandas .errors import PerformanceWarning
18+ from pandas ._libs import index as libindex , lib
19+ from pandas ._libs .sparse import BlockIndex , IntIndex
20+ from pandas ._libs .tslibs import NaT
1821from pandas .compat .numpy import function as nv
19-
2022from pandas .core .accessor import PandasDelegate , delegate_names
2123from pandas .core .arrays import ExtensionArray , ExtensionOpsMixin
22- import pandas .core .common as com
24+ from pandas .core .base import PandasObject
2325from pandas .core .dtypes .base import ExtensionDtype
26+ from pandas .core .dtypes .cast import (
27+ astype_nansafe , construct_1d_arraylike_from_scalar , find_common_type ,
28+ infer_dtype_from_scalar , maybe_convert_platform
29+ )
30+ from pandas .core .dtypes .common import (
31+ is_array_like , is_bool_dtype , is_datetime64_any_dtype , is_dtype_equal ,
32+ is_integer , is_list_like , is_object_dtype , is_scalar , is_string_dtype ,
33+ pandas_dtype
34+ )
2435from pandas .core .dtypes .dtypes import register_extension_dtype
2536from pandas .core .dtypes .generic import (
26- ABCSparseSeries , ABCSeries , ABCIndexClass
37+ ABCIndexClass , ABCSeries , ABCSparseSeries
2738)
28- from pandas .core .dtypes .common import (
29- is_datetime64_any_dtype ,
30- is_integer ,
31- is_object_dtype ,
32- is_array_like ,
33- pandas_dtype ,
34- is_bool_dtype ,
35- is_list_like ,
36- is_string_dtype ,
37- is_scalar , is_dtype_equal )
38- from pandas .core .dtypes .cast import (
39- maybe_convert_platform ,
40- astype_nansafe , find_common_type , infer_dtype_from_scalar ,
41- construct_1d_arraylike_from_scalar )
42- from pandas .core .dtypes .missing import isna , notna , na_value_for_dtype
39+ from pandas .core .dtypes .missing import isna , na_value_for_dtype , notna
4340from pandas .core .missing import interpolate_2d
44-
45- import pandas ._libs .sparse as splib
46- from pandas ._libs .sparse import BlockIndex , IntIndex
47- from pandas ._libs import index as libindex
48- from pandas ._libs import lib
49- import pandas .core .algorithms as algos
50- import pandas .io .formats .printing as printing
41+ from pandas .errors import PerformanceWarning
5142
5243
5344# ----------------------------------------------------------------------------
5445# Dtype
55-
5646@register_extension_dtype
5747class SparseDtype (ExtensionDtype ):
5848 """
@@ -620,7 +610,7 @@ def __array__(self, dtype=None, copy=True):
620610 if is_datetime64_any_dtype (self .sp_values .dtype ):
621611 # However, we *do* special-case the common case of
622612 # a datetime64 with pandas NaT.
623- if fill_value is pd . NaT :
613+ if fill_value is NaT :
624614 # Can't put pd.NaT in a datetime64[ns]
625615 fill_value = np .datetime64 ('NaT' )
626616 try :
@@ -710,7 +700,7 @@ def _null_fill_value(self):
710700
711701 def _fill_value_matches (self , fill_value ):
712702 if self ._null_fill_value :
713- return pd . isna (fill_value )
703+ return isna (fill_value )
714704 else :
715705 return self .fill_value == fill_value
716706
@@ -855,7 +845,7 @@ def _first_fill_value_loc(self):
855845 return np .searchsorted (diff , 2 ) + 1
856846
857847 def unique (self ):
858- uniques = list (pd .unique (self .sp_values ))
848+ uniques = list (algos .unique (self .sp_values ))
859849 fill_loc = self ._first_fill_value_loc ()
860850 if fill_loc >= 0 :
861851 uniques .insert (fill_loc , self .fill_value )
@@ -871,8 +861,8 @@ def factorize(self, na_sentinel=-1):
871861 # ExtensionArray.factorize -> Tuple[EA, EA]
872862 # Given that we have to return a dense array of labels, why bother
873863 # implementing an efficient factorize?
874- labels , uniques = pd .factorize (np .asarray (self ),
875- na_sentinel = na_sentinel )
864+ labels , uniques = algos .factorize (np .asarray (self ),
865+ na_sentinel = na_sentinel )
876866 uniques = SparseArray (uniques , dtype = self .dtype )
877867 return labels , uniques
878868
@@ -889,6 +879,8 @@ def value_counts(self, dropna=True):
889879 -------
890880 counts : Series
891881 """
882+ from pandas import Index , Series
883+
892884 keys , counts = algos ._value_counts_arraylike (self .sp_values ,
893885 dropna = dropna )
894886 fcounts = self .sp_index .ngaps
@@ -897,7 +889,7 @@ def value_counts(self, dropna=True):
897889 pass
898890 else :
899891 if self ._null_fill_value :
900- mask = pd . isna (keys )
892+ mask = isna (keys )
901893 else :
902894 mask = keys == self .fill_value
903895
@@ -907,9 +899,9 @@ def value_counts(self, dropna=True):
907899 keys = np .insert (keys , 0 , self .fill_value )
908900 counts = np .insert (counts , 0 , fcounts )
909901
910- if not isinstance (keys , pd . Index ):
911- keys = pd . Index (keys )
912- result = pd . Series (counts , index = keys )
902+ if not isinstance (keys , ABCIndexClass ):
903+ keys = Index (keys )
904+ result = Series (counts , index = keys )
913905 return result
914906
915907 # --------
0 commit comments