From dd273e88c14c8ad6a082d300faa74c9da214ac23 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 17 Nov 2023 12:08:13 -0800
Subject: [PATCH] REF: cython3 cleanups

---
 pandas/_libs/algos.pyx             |  5 ++---
 pandas/_libs/arrays.pyx            |  3 +--
 pandas/_libs/groupby.pyx           | 16 ++++-----------
 pandas/_libs/internals.pyx         |  6 +-----
 pandas/_libs/lib.pyx               |  3 +--
 pandas/_libs/parsers.pyx           |  6 +-----
 pandas/_libs/tslibs/timestamps.pyx | 32 ++++++++++--------------------
 pandas/_libs/tslibs/util.pxd       | 12 +++++------
 8 files changed, 25 insertions(+), 58 deletions(-)
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index b330b37aebd8d..dbe165b3ebec0 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -998,8 +998,7 @@ def rank_1d(
 
     N = len(values)
     if labels is not None:
-        # TODO(cython3): cast won't be necessary (#2992)
-        assert <Py_ssize_t>len(labels) == N
+        assert len(labels) == N
     out = np.empty(N)
     grp_sizes = np.ones(N, dtype=np.int64)
 
@@ -1483,7 +1482,7 @@ def diff_2d(
     cdef:
         Py_ssize_t i, j, sx, sy, start, stop
         bint f_contig = arr.flags.f_contiguous
-        # bint f_contig = arr.is_f_contig()  # TODO(cython3)
+        # bint f_contig = arr.is_f_contig()  # TODO(cython3) once arr is memoryview
         diff_t left, right
 
     # Disable for unsupported dtype combinations,
diff --git a/pandas/_libs/arrays.pyx b/pandas/_libs/arrays.pyx
index 718fb358e26bc..9889436a542c1 100644
--- a/pandas/_libs/arrays.pyx
+++ b/pandas/_libs/arrays.pyx
@@ -126,8 +126,7 @@ cdef class NDArrayBacked:
 
     @property
     def size(self) -> int:
-        # TODO(cython3): use self._ndarray.size
-        return cnp.PyArray_SIZE(self._ndarray)
+        return self._ndarray.size
 
     @property
     def nbytes(self) -> int:
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index a73e37b9cfe1f..19d71b0a6fde3 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1436,9 +1436,7 @@ def group_last(
         bint uses_mask = mask is not None
         bint isna_entry
 
-    # TODO(cython3):
-    # Instead of `labels.shape[0]` use `len(labels)`
-    if not len(values) == labels.shape[0]:
+    if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
     min_count = max(min_count, 1)
@@ -1500,9 +1498,7 @@ def group_nth(
         bint uses_mask = mask is not None
         bint isna_entry
 
-    # TODO(cython3):
-    # Instead of `labels.shape[0]` use `len(labels)`
-    if not len(values) == labels.shape[0]:
+    if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
     min_count = max(min_count, 1)
@@ -1676,9 +1672,7 @@ cdef group_min_max(
         bint uses_mask = mask is not None
         bint isna_entry
 
-    # TODO(cython3):
-    # Instead of `labels.shape[0]` use `len(labels)`
-    if not len(values) == labels.shape[0]:
+    if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
     min_count = max(min_count, 1)
@@ -1779,9 +1773,7 @@ def group_idxmin_idxmax(
 
     assert name == "idxmin" or name == "idxmax"
 
-    # TODO(cython3):
-    # Instead of `labels.shape[0]` use `len(labels)`
-    if not len(values) == labels.shape[0]:
+    if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
     N, K = (<object>values).shape
diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index fdfb8e1c99f6e..05c4e7bd5e9dc 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -2,14 +2,10 @@ from collections import defaultdict
 import weakref
 
 cimport cython
+from cpython.pyport cimport PY_SSIZE_T_MAX
 from cpython.slice cimport PySlice_GetIndicesEx
 from cython cimport Py_ssize_t
 
-
-cdef extern from "Python.h":
-    # TODO(cython3): from cpython.pyport cimport PY_SSIZE_T_MAX
-    Py_ssize_t PY_SSIZE_T_MAX
-
 import numpy as np
 
 cimport numpy as cnp
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 5b705e80f97f5..2f1b24d9dfc38 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -502,8 +502,7 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-# TODO(cython3): Can add const once cython#1772 is resolved
-def has_infs(floating[:] arr) -> bool:
+def has_infs(const floating[:] arr) -> bool:
     cdef:
         Py_ssize_t i, n = len(arr)
         floating inf, neginf, val
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 823c267cf92b1..ab28b34be58f2 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -34,6 +34,7 @@ from cpython.unicode cimport (
     PyUnicode_AsUTF8String,
     PyUnicode_Decode,
     PyUnicode_DecodeUTF8,
+    PyUnicode_FromString,
 )
 from cython cimport Py_ssize_t
 from libc.stdlib cimport free
@@ -44,11 +45,6 @@ from libc.string cimport (
 )
 
 
-cdef extern from "Python.h":
-    # TODO(cython3): get this from cpython.unicode
-    object PyUnicode_FromString(char *v)
-
-
 import numpy as np
 
 cimport numpy as cnp
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index c4693be8c11c6..568539b53aee0 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -504,17 +504,11 @@ cdef class _Timestamp(ABCTimestamp):
             return NotImplemented
 
         # coerce if necessary if we are a Timestamp-like
-        if (PyDateTime_Check(self)
-                and (PyDateTime_Check(other) or cnp.is_datetime64_object(other))):
+        if PyDateTime_Check(other) or cnp.is_datetime64_object(other):
             # both_timestamps is to determine whether Timedelta(self - other)
             # should raise the OOB error, or fall back returning a timedelta.
-            # TODO(cython3): clean out the bits that moved to __rsub__
-            both_timestamps = (isinstance(other, _Timestamp) and
-                               isinstance(self, _Timestamp))
-            if isinstance(self, _Timestamp):
-                other = type(self)(other)
-            else:
-                self = type(other)(self)
+            both_timestamps = isinstance(other, _Timestamp)
+            other = type(self)(other)
 
             if (self.tzinfo is None) ^ (other.tzinfo is None):
                 raise TypeError(
@@ -531,24 +525,18 @@ cdef class _Timestamp(ABCTimestamp):
             # scalar Timestamp/datetime - Timestamp/datetime -> yields a
             # Timedelta
             try:
-                res_value = self._value- other._value
+                res_value = self._value - other._value
                 return Timedelta._from_value_and_reso(res_value, self._creso)
             except (OverflowError, OutOfBoundsDatetime, OutOfBoundsTimedelta) as err:
-                if isinstance(other, _Timestamp):
-                    if both_timestamps:
-                        raise OutOfBoundsDatetime(
-                            "Result is too large for pandas.Timedelta. Convert inputs "
-                            "to datetime.datetime with 'Timestamp.to_pydatetime()' "
-                            "before subtracting."
-                        ) from err
+                if both_timestamps:
+                    raise OutOfBoundsDatetime(
+                        "Result is too large for pandas.Timedelta. Convert inputs "
+                        "to datetime.datetime with 'Timestamp.to_pydatetime()' "
+                        "before subtracting."
+                    ) from err
                 # We get here in stata tests, fall back to stdlib datetime
                 #  method and return stdlib timedelta object
                 pass
-        elif cnp.is_datetime64_object(self):
-            # GH#28286 cython semantics for __rsub__, `other` is actually
-            #  the Timestamp
-            # TODO(cython3): remove this, this moved to __rsub__
-            return type(other)(self) - other
 
         return NotImplemented
 
diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd
index 2c39a2b4699b2..e4ac3a9e167a3 100644
--- a/pandas/_libs/tslibs/util.pxd
+++ b/pandas/_libs/tslibs/util.pxd
@@ -1,5 +1,6 @@
 
 from cpython.object cimport PyTypeObject
+from cpython.unicode cimport PyUnicode_AsUTF8AndSize
 
 
 cdef extern from "Python.h":
@@ -10,14 +11,8 @@ cdef extern from "Python.h":
     bint PyComplex_Check(object obj) nogil
     bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
 
-    # TODO(cython3): cimport this, xref GH#49670
     # Note that following functions can potentially raise an exception,
-    # thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can
-    # potentially allocate memory inside in unlikely case of when underlying
-    # unicode object was stored as non-utf8 and utf8 wasn't requested before.
-    const char* PyUnicode_AsUTF8AndSize(object obj,
-                                        Py_ssize_t* length) except NULL
-
+    # thus they cannot be declared 'nogil'.
     object PyUnicode_EncodeLocale(object obj, const char *errors) nogil
     object PyUnicode_DecodeLocale(const char *str, const char *errors) nogil
 
@@ -180,6 +175,9 @@ cdef inline const char* get_c_string_buf_and_size(str py_string,
     -------
     buf : const char*
     """
+    # Note PyUnicode_AsUTF8AndSize() can
+    #  potentially allocate memory inside in unlikely case of when underlying
+    #  unicode object was stored as non-utf8 and utf8 wasn't requested before.
     return PyUnicode_AsUTF8AndSize(py_string, length)