From 23c85b14c2d8a36d30e82b41424f04f5e71e7877 Mon Sep 17 00:00:00 2001
From: Jeffrey Harrison <jeffrharrison@gmail.com>
Date: Fri, 7 Mar 2025 17:04:52 -0700
Subject: [PATCH 1/7] Save original index and remap after function completes.

---
 pandas/core/methods/selectn.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py
index 02e7445f1d275..3e10f8bad31f3 100644
--- a/pandas/core/methods/selectn.py
+++ b/pandas/core/methods/selectn.py
@@ -111,13 +111,20 @@ def compute(self, method: str) -> Series:
         if n <= 0:
             return self.obj[[]]
 
-        dropped = self.obj.dropna()
-        nan_index = self.obj.drop(dropped.index)
+        # Save index and reset to default index to avoid performance impact 
+        # from when index contains duplicates
+        original_index = self.obj.index
+        cur_series = self.obj.reset_index(drop=True)
+
+        dropped = cur_series.dropna()
+        nan_index = cur_series.drop(dropped.index)
 
         # slow method
-        if n >= len(self.obj):
+        if n >= len(cur_series):
             ascending = method == "nsmallest"
-            return self.obj.sort_values(ascending=ascending).head(n)
+            final_series =  cur_series.sort_values(ascending=ascending, kind="mergesort").head(n)
+            final_series.index = original_index.take(final_series.index)
+            return final_series
 
         # fast method
         new_dtype = dropped.dtype
@@ -173,7 +180,9 @@ def compute(self, method: str) -> Series:
             # reverse indices
             inds = narr - 1 - inds
 
-        return concat([dropped.iloc[inds], nan_index]).iloc[:findex]
+        final_series = concat([dropped.iloc[inds], nan_index]).iloc[:findex]
+        final_series.index = original_index.take(final_series.index)
+        return final_series
 
 
 class SelectNFrame(SelectN[DataFrame]):

From 4c39ea37812e8931b01454055095102db6553fb3 Mon Sep 17 00:00:00 2001
From: Jeffrey Harrison <jeffrharrison@gmail.com>
Date: Thu, 13 Mar 2025 14:33:04 -0600
Subject: [PATCH 2/7] precommit passes

---
 pandas/core/methods/selectn.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py
index 3e10f8bad31f3..e7fbb29d4fc49 100644
--- a/pandas/core/methods/selectn.py
+++ b/pandas/core/methods/selectn.py
@@ -11,6 +11,7 @@
 from typing import (
     TYPE_CHECKING,
     Generic,
+    Literal,
     cast,
     final,
 )
@@ -54,7 +55,9 @@
 
 
 class SelectN(Generic[NDFrameT]):
-    def __init__(self, obj: NDFrameT, n: int, keep: str) -> None:
+    def __init__(
+        self, obj: NDFrameT, n: int, keep: Literal["first", "last", "all"]
+    ) -> None:
         self.obj = obj
         self.n = n
         self.keep = keep
@@ -111,9 +114,9 @@ def compute(self, method: str) -> Series:
         if n <= 0:
             return self.obj[[]]
 
-        # Save index and reset to default index to avoid performance impact 
+        # Save index and reset to default index to avoid performance impact
         # from when index contains duplicates
-        original_index = self.obj.index
+        original_index: Index = self.obj.index
         cur_series = self.obj.reset_index(drop=True)
 
         dropped = cur_series.dropna()
@@ -122,7 +125,9 @@ def compute(self, method: str) -> Series:
         # slow method
         if n >= len(cur_series):
             ascending = method == "nsmallest"
-            final_series =  cur_series.sort_values(ascending=ascending, kind="mergesort").head(n)
+            final_series = cur_series.sort_values(
+                ascending=ascending, kind="mergesort"
+            ).head(n)
             final_series.index = original_index.take(final_series.index)
             return final_series
 

From 0c0c3b5bd4f305db6b37479604dad31c81de180b Mon Sep 17 00:00:00 2001
From: Jeffrey Harrison <jeffrharrison@gmail.com>
Date: Thu, 13 Mar 2025 15:29:23 -0600
Subject: [PATCH 3/7] use stable sorting 'mergesort' in tests

---
 pandas/tests/frame/methods/test_nlargest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index c6e5304ae3cb4..647ef87671198 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -153,11 +153,11 @@ def test_nlargest_n_duplicate_index(self, n, order, request):
             index=[0, 0, 1, 1, 1],
         )
         result = df.nsmallest(n, order)
-        expected = df.sort_values(order).head(n)
+        expected = df.sort_values(order, kind="mergesort").head(n)
         tm.assert_frame_equal(result, expected)
 
         result = df.nlargest(n, order)
-        expected = df.sort_values(order, ascending=False).head(n)
+        expected = df.sort_values(order, ascending=False, kind="mergesort").head(n)
         if Version(np.__version__) >= Version("1.25") and (
             (order == ["a"] and n in (1, 2, 3, 4)) or ((order == ["a", "b"]) and n == 5)
         ):

From cb3586fdb3a4a2eac324e2a4ec8f6c3b2c3bdc82 Mon Sep 17 00:00:00 2001
From: Jeffrey Harrison <jeffrharrison@gmail.com>
Date: Thu, 13 Mar 2025 16:03:29 -0600
Subject: [PATCH 4/7] Change sorts to `stable` instead of mergesort

---
 doc/source/whatsnew/v3.0.0.rst              |  3 +++
 pandas/core/methods/selectn.py              | 10 +++++-----
 pandas/tests/frame/methods/test_nlargest.py |  4 ++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 3294af742fae0..05b52f7ec950b 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -72,6 +72,7 @@ Other enhancements
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
+- :meth:`Series.nlargest` has improved performance when there are duplicate values in the index (:issue:`55767`)
 - :meth:`Series.str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
@@ -152,6 +153,8 @@ These improvements also fixed certain bugs in groupby:
 - :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`)
 - :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`)
 
+- :meth:`Series.nlargest`
+
 .. _whatsnew_300.notable_bug_fixes.notable_bug_fix2:
 
 notable_bug_fix2
diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py
index e7fbb29d4fc49..544c5ce4df8ab 100644
--- a/pandas/core/methods/selectn.py
+++ b/pandas/core/methods/selectn.py
@@ -119,18 +119,18 @@ def compute(self, method: str) -> Series:
         original_index: Index = self.obj.index
         cur_series = self.obj.reset_index(drop=True)
 
-        dropped = cur_series.dropna()
-        nan_index = cur_series.drop(dropped.index)
-
         # slow method
         if n >= len(cur_series):
             ascending = method == "nsmallest"
             final_series = cur_series.sort_values(
-                ascending=ascending, kind="mergesort"
+                ascending=ascending, kind="stable"
             ).head(n)
             final_series.index = original_index.take(final_series.index)
             return final_series
 
+        dropped = cur_series.dropna()
+        nan_index = cur_series.drop(dropped.index)
+
         # fast method
         new_dtype = dropped.dtype
 
@@ -291,4 +291,4 @@ def get_indexer(current_indexer: Index, other_indexer: Index) -> Index:
 
         ascending = method == "nsmallest"
 
-        return frame.sort_values(columns, ascending=ascending, kind="mergesort")
+        return frame.sort_values(columns, ascending=ascending, kind="stable")
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index 647ef87671198..08b7128e6ec11 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -153,11 +153,11 @@ def test_nlargest_n_duplicate_index(self, n, order, request):
             index=[0, 0, 1, 1, 1],
         )
         result = df.nsmallest(n, order)
-        expected = df.sort_values(order, kind="mergesort").head(n)
+        expected = df.sort_values(order, kind="stable").head(n)
         tm.assert_frame_equal(result, expected)
 
         result = df.nlargest(n, order)
-        expected = df.sort_values(order, ascending=False, kind="mergesort").head(n)
+        expected = df.sort_values(order, ascending=False, kind="stable").head(n)
         if Version(np.__version__) >= Version("1.25") and (
             (order == ["a"] and n in (1, 2, 3, 4)) or ((order == ["a", "b"]) and n == 5)
         ):

From ba2470e3e764cb18ad0f9d6543eead9f933458cd Mon Sep 17 00:00:00 2001
From: Jeffrey Harrison <jeffrharrison@gmail.com>
Date: Fri, 14 Mar 2025 09:40:13 -0600
Subject: [PATCH 5/7] modify 'keep' to use a Literal instead of string

---
 pandas/core/methods/selectn.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py
index 544c5ce4df8ab..b3bff2c5186b5 100644
--- a/pandas/core/methods/selectn.py
+++ b/pandas/core/methods/selectn.py
@@ -206,7 +206,13 @@ class SelectNFrame(SelectN[DataFrame]):
     nordered : DataFrame
     """
 
-    def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> None:
+    def __init__(
+        self,
+        obj: DataFrame,
+        n: int,
+        keep: Literal["first", "last", "all"],
+        columns: IndexLabel,
+    ) -> None:
         super().__init__(obj, n, keep)
         if not is_list_like(columns) or isinstance(columns, tuple):
             columns = [columns]

From 1fb6cc3b2c48ba1c5392d68848566a36406c0f51 Mon Sep 17 00:00:00 2001
From: Jeffrey Harrison <jeffrharrison@gmail.com>
Date: Wed, 26 Mar 2025 12:32:57 -0600
Subject: [PATCH 6/7] address comments

---
 pandas/core/methods/selectn.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py
index b3bff2c5186b5..59516b16905dc 100644
--- a/pandas/core/methods/selectn.py
+++ b/pandas/core/methods/selectn.py
@@ -117,21 +117,22 @@ def compute(self, method: str) -> Series:
         # Save index and reset to default index to avoid performance impact
         # from when index contains duplicates
         original_index: Index = self.obj.index
-        cur_series = self.obj.reset_index(drop=True)
+        default_index = self.obj.reset_index(drop=True)
 
-        # slow method
-        if n >= len(cur_series):
+        # Slower method used when taking the full length of the series
+        # In this case, it is equivalent to a sort.
+        if n >= len(default_index):
             ascending = method == "nsmallest"
-            final_series = cur_series.sort_values(
-                ascending=ascending, kind="stable"
-            ).head(n)
-            final_series.index = original_index.take(final_series.index)
-            return final_series
+            result = default_index.sort_values(ascending=ascending, kind="stable").head(
+                n
+            )
+            result.index = original_index.take(result.index)
+            return result
 
-        dropped = cur_series.dropna()
-        nan_index = cur_series.drop(dropped.index)
+        # Fast method used in the general case
+        dropped = default_index.dropna()
+        nan_index = default_index.drop(dropped.index)
 
-        # fast method
         new_dtype = dropped.dtype
 
         # Similar to algorithms._ensure_data
@@ -170,7 +171,7 @@ def compute(self, method: str) -> Series:
         else:
             kth_val = np.nan
         (ns,) = np.nonzero(arr <= kth_val)
-        inds = ns[arr[ns].argsort(kind="mergesort")]
+        inds = ns[arr[ns].argsort(kind="stable")]
 
         if self.keep != "all":
             inds = inds[:n]
@@ -185,9 +186,9 @@ def compute(self, method: str) -> Series:
             # reverse indices
             inds = narr - 1 - inds
 
-        final_series = concat([dropped.iloc[inds], nan_index]).iloc[:findex]
-        final_series.index = original_index.take(final_series.index)
-        return final_series
+        result = concat([dropped.iloc[inds], nan_index]).iloc[:findex]
+        result.index = original_index.take(result.index)
+        return result
 
 
 class SelectNFrame(SelectN[DataFrame]):

From 8aa2a2c12346aa967c9731129404e29b613457a9 Mon Sep 17 00:00:00 2001
From: Jeffrey Harrison <jeffrharrison@gmail.com>
Date: Wed, 26 Mar 2025 12:50:30 -0600
Subject: [PATCH 7/7] update doc to include stable sort change

---
 doc/source/whatsnew/v3.0.0.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 05b52f7ec950b..2d74be6f503a2 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -61,6 +61,7 @@ Other enhancements
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
+- :meth:`Series.nlargest` uses a 'stable' sort internally and will preserve original ordering.
 - :class:`ArrowDtype` now supports ``pyarrow.JsonType`` (:issue:`60958`)
 - :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`)
 - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`)
@@ -72,7 +73,6 @@ Other enhancements
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
-- :meth:`Series.nlargest` has improved performance when there are duplicate values in the index (:issue:`55767`)
 - :meth:`Series.str.get_dummies` now accepts a  ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
 - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
@@ -153,8 +153,6 @@ These improvements also fixed certain bugs in groupby:
 - :meth:`.DataFrameGroupBy.sum` would have incorrect values when there are multiple groupings, unobserved groups, and non-numeric data (:issue:`43891`)
 - :meth:`.DataFrameGroupBy.value_counts` would produce incorrect results when used with some categorical and some non-categorical groupings and ``observed=False`` (:issue:`56016`)
 
-- :meth:`Series.nlargest`
-
 .. _whatsnew_300.notable_bug_fixes.notable_bug_fix2:
 
 notable_bug_fix2
@@ -596,6 +594,7 @@ Performance improvements
 - :func:`concat` returns a :class:`RangeIndex` column when possible when ``objs`` contains :class:`Series` and :class:`DataFrame` and ``axis=0`` (:issue:`58119`)
 - :func:`concat` returns a :class:`RangeIndex` level in the :class:`MultiIndex` result when ``keys`` is a ``range`` or :class:`RangeIndex` (:issue:`57542`)
 - :meth:`RangeIndex.append` returns a :class:`RangeIndex` instead of a :class:`Index` when appending values that could continue the :class:`RangeIndex` (:issue:`57467`)
+- :meth:`Series.nlargest` has improved performance when there are duplicate values in the index (:issue:`55767`)
 - :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
 - :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
 - Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)