From dd273e88c14c8ad6a082d300faa74c9da214ac23 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 17 Nov 2023 12:08:13 -0800 Subject: [PATCH] REF: cython3 cleanups --- pandas/_libs/algos.pyx | 5 ++--- pandas/_libs/arrays.pyx | 3 +-- pandas/_libs/groupby.pyx | 16 ++++----------- pandas/_libs/internals.pyx | 6 +----- pandas/_libs/lib.pyx | 3 +-- pandas/_libs/parsers.pyx | 6 +----- pandas/_libs/tslibs/timestamps.pyx | 32 ++++++++++-------------------- pandas/_libs/tslibs/util.pxd | 12 +++++------ 8 files changed, 25 insertions(+), 58 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index b330b37aebd8d..dbe165b3ebec0 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -998,8 +998,7 @@ def rank_1d( N = len(values) if labels is not None: - # TODO(cython3): cast won't be necessary (#2992) - assert len(labels) == N + assert len(labels) == N out = np.empty(N) grp_sizes = np.ones(N, dtype=np.int64) @@ -1483,7 +1482,7 @@ def diff_2d( cdef: Py_ssize_t i, j, sx, sy, start, stop bint f_contig = arr.flags.f_contiguous - # bint f_contig = arr.is_f_contig() # TODO(cython3) + # bint f_contig = arr.is_f_contig() # TODO(cython3) once arr is memoryview diff_t left, right # Disable for unsupported dtype combinations, diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx index 718fb358e26bc..9889436a542c1 100644 --- a/pandas/_libs/arrays.pyx +++ b/pandas/_libs/arrays.pyx @@ -126,8 +126,7 @@ cdef class NDArrayBacked: @property def size(self) -> int: - # TODO(cython3): use self._ndarray.size - return cnp.PyArray_SIZE(self._ndarray) + return self._ndarray.size @property def nbytes(self) -> int: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index a73e37b9cfe1f..19d71b0a6fde3 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1436,9 +1436,7 @@ def group_last( bint uses_mask = mask is not None bint isna_entry - # TODO(cython3): - # Instead of `labels.shape[0]` use `len(labels)` - if not len(values) == labels.shape[0]: + if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") min_count = max(min_count, 1) @@ -1500,9 +1498,7 @@ def group_nth( bint uses_mask = mask is not None bint isna_entry - # TODO(cython3): - # Instead of `labels.shape[0]` use `len(labels)` - if not len(values) == labels.shape[0]: + if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") min_count = max(min_count, 1) @@ -1676,9 +1672,7 @@ cdef group_min_max( bint uses_mask = mask is not None bint isna_entry - # TODO(cython3): - # Instead of `labels.shape[0]` use `len(labels)` - if not len(values) == labels.shape[0]: + if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") min_count = max(min_count, 1) @@ -1779,9 +1773,7 @@ def group_idxmin_idxmax( assert name == "idxmin" or name == "idxmax" - # TODO(cython3): - # Instead of `labels.shape[0]` use `len(labels)` - if not len(values) == labels.shape[0]: + if not len(values) == len(labels): raise AssertionError("len(index) != len(labels)") N, K = (values).shape diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index fdfb8e1c99f6e..05c4e7bd5e9dc 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -2,14 +2,10 @@ from collections import defaultdict import weakref cimport cython +from cpython.pyport cimport PY_SSIZE_T_MAX from cpython.slice cimport PySlice_GetIndicesEx from cython cimport Py_ssize_t - -cdef extern from "Python.h": - # TODO(cython3): from cpython.pyport cimport PY_SSIZE_T_MAX - Py_ssize_t PY_SSIZE_T_MAX - import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5b705e80f97f5..2f1b24d9dfc38 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -502,8 +502,7 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray: @cython.wraparound(False) @cython.boundscheck(False) -# TODO(cython3): Can add const once cython#1772 is resolved -def has_infs(floating[:] arr) -> bool: +def has_infs(const floating[:] arr) -> bool: cdef: Py_ssize_t i, n = len(arr) floating inf, neginf, val diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 823c267cf92b1..ab28b34be58f2 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -34,6 +34,7 @@ from cpython.unicode cimport ( PyUnicode_AsUTF8String, PyUnicode_Decode, PyUnicode_DecodeUTF8, + PyUnicode_FromString, ) from cython cimport Py_ssize_t from libc.stdlib cimport free @@ -44,11 +45,6 @@ from libc.string cimport ( ) -cdef extern from "Python.h": - # TODO(cython3): get this from cpython.unicode - object PyUnicode_FromString(char *v) - - import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index c4693be8c11c6..568539b53aee0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -504,17 +504,11 @@ cdef class _Timestamp(ABCTimestamp): return NotImplemented # coerce if necessary if we are a Timestamp-like - if (PyDateTime_Check(self) - and (PyDateTime_Check(other) or cnp.is_datetime64_object(other))): + if PyDateTime_Check(other) or cnp.is_datetime64_object(other): # both_timestamps is to determine whether Timedelta(self - other) # should raise the OOB error, or fall back returning a timedelta. - # TODO(cython3): clean out the bits that moved to __rsub__ - both_timestamps = (isinstance(other, _Timestamp) and - isinstance(self, _Timestamp)) - if isinstance(self, _Timestamp): - other = type(self)(other) - else: - self = type(other)(self) + both_timestamps = isinstance(other, _Timestamp) + other = type(self)(other) if (self.tzinfo is None) ^ (other.tzinfo is None): raise TypeError( @@ -531,24 +525,18 @@ cdef class _Timestamp(ABCTimestamp): # scalar Timestamp/datetime - Timestamp/datetime -> yields a # Timedelta try: - res_value = self._value- other._value + res_value = self._value - other._value return Timedelta._from_value_and_reso(res_value, self._creso) except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err: - if isinstance(other, _Timestamp): - if both_timestamps: - raise OutOfBoundsDatetime( - "Result is too large for pandas.Timedelta. Convert inputs " - "to datetime.datetime with 'Timestamp.to_pydatetime()' " - "before subtracting." - ) from err + if both_timestamps: + raise OutOfBoundsDatetime( + "Result is too large for pandas.Timedelta. Convert inputs " + "to datetime.datetime with 'Timestamp.to_pydatetime()' " + "before subtracting." + ) from err # We get here in stata tests, fall back to stdlib datetime # method and return stdlib timedelta object pass - elif cnp.is_datetime64_object(self): - # GH#28286 cython semantics for __rsub__, `other` is actually - # the Timestamp - # TODO(cython3): remove this, this moved to __rsub__ - return type(other)(self) - other return NotImplemented diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 2c39a2b4699b2..e4ac3a9e167a3 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -1,5 +1,6 @@ from cpython.object cimport PyTypeObject +from cpython.unicode cimport PyUnicode_AsUTF8AndSize cdef extern from "Python.h": @@ -10,14 +11,8 @@ cdef extern from "Python.h": bint PyComplex_Check(object obj) nogil bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil - # TODO(cython3): cimport this, xref GH#49670 # Note that following functions can potentially raise an exception, - # thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can - # potentially allocate memory inside in unlikely case of when underlying - # unicode object was stored as non-utf8 and utf8 wasn't requested before. - const char* PyUnicode_AsUTF8AndSize(object obj, - Py_ssize_t* length) except NULL - + # thus they cannot be declared 'nogil'. object PyUnicode_EncodeLocale(object obj, const char *errors) nogil object PyUnicode_DecodeLocale(const char *str, const char *errors) nogil @@ -180,6 +175,9 @@ cdef inline const char* get_c_string_buf_and_size(str py_string, ------- buf : const char* """ + # Note PyUnicode_AsUTF8AndSize() can + # potentially allocate memory inside in unlikely case of when underlying + # unicode object was stored as non-utf8 and utf8 wasn't requested before. return PyUnicode_AsUTF8AndSize(py_string, length)