From fff8a6b28dfb8b96d7d35f5c6cc8f73b5294d4c5 Mon Sep 17 00:00:00 2001
From: Terji Petersen <terji78@gmail.com>
Date: Mon, 17 Apr 2023 17:00:04 +0100
Subject: [PATCH 1/7] ENH: better dtype inference when doing DataFrame
 reductions

---
 pandas/core/frame.py                  |  28 ++++----
 pandas/tests/frame/test_reductions.py | 100 +++++++++++++++++++++++++-
 pandas/tests/groupby/test_apply.py    |   2 +-
 3 files changed, 113 insertions(+), 17 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5341b87c39676..0b060007bed06 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -94,6 +94,7 @@
     is_dataclass,
     is_dict_like,
     is_dtype_equal,
+    is_extension_array_dtype,
     is_float,
     is_float_dtype,
     is_hashable,
@@ -10899,14 +10900,23 @@ def _get_data() -> DataFrame:
         #  simple case where we can use BlockManager.reduce
         res = df._mgr.reduce(blk_func)
         out = df._constructor(res).iloc[0]
+        mgr_dtypes = df._mgr.get_dtypes().tolist()
+        if out.dtype != object:
+            # e.g. if data dtype is UInt8 and out.dtype is uint64, then common is UInt64
+            mgr_dtypes.append(out.dtype)
+        common_dtype = find_common_type(mgr_dtypes) if mgr_dtypes else None
+        is_ext_dtype = common_dtype is not None and is_extension_array_dtype(
+            common_dtype
+        )
+
         if out_dtype is not None:
             out = out.astype(out_dtype)
+        elif is_ext_dtype and out.dtype == common_dtype.type:
+            out = out.astype(common_dtype)
+        elif out.dtype == object and isna(out).all():
+            out = out.astype(common_dtype)
         elif (df._mgr.get_dtypes() == object).any():
             out = out.astype(object)
-        elif len(self) == 0 and name in ("sum", "prod"):
-            # Even if we are object dtype, follow numpy and return
-            #  float64, see test_apply_funcs_over_empty
-            out = out.astype(np.float64)
 
         return out
 
@@ -11157,11 +11167,6 @@ def idxmin(
         )
         indices = res._values
 
-        # indices will always be np.ndarray since axis is not None and
-        # values is a 2d array for DataFrame
-        # error: Item "int" of "Union[int, Any]" has no attribute "__iter__"
-        assert isinstance(indices, np.ndarray)  # for mypy
-
         index = data._get_axis(axis)
         result = [index[i] if i >= 0 else np.nan for i in indices]
         final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis))
@@ -11182,11 +11187,6 @@ def idxmax(
         )
         indices = res._values
 
-        # indices will always be np.ndarray since axis is not None and
-        # values is a 2d array for DataFrame
-        # error: Item "int" of "Union[int, Any]" has no attribute "__iter__"
-        assert isinstance(indices, np.ndarray)  # for mypy
-
         index = data._get_axis(axis)
         result = [index[i] if i >= 0 else np.nan for i in indices]
         final_result = data._constructor_sliced(result, index=data._get_agg_axis(axis))
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 0d352b8e34f37..75c3f81cb7560 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -917,7 +917,7 @@ def test_mean_extensionarray_numeric_only_true(self):
         arr = np.random.randint(1000, size=(10, 5))
         df = DataFrame(arr, dtype="Int64")
         result = df.mean(numeric_only=True)
-        expected = DataFrame(arr).mean()
+        expected = DataFrame(arr, dtype="Float64").mean()
         tm.assert_series_equal(result, expected)
 
     def test_stats_mixed_type(self, float_string_frame):
@@ -1544,6 +1544,100 @@ def test_reduction_timedelta_smallest_unit(self):
         tm.assert_series_equal(result, expected)
 
 
+class TestEmptyDataFrameReductions:
+    @pytest.mark.parametrize(
+        "opname, dtype, exp_value, exp_dtype",
+        [
+            ("sum", np.int8, 0, np.int64),
+            ("prod", np.int8, 1, np.int64),
+            ("sum", np.int64, 0, np.int64),
+            ("prod", np.int64, 1, np.int64),
+            ("sum", np.uint8, 0, np.int64),
+            ("prod", np.uint8, 1, np.uint64),
+            ("sum", np.uint64, 0, np.int64),
+            ("prod", np.uint64, 1, np.uint64),
+            ("sum", np.float32, 0, np.float32),
+            ("prod", np.float32, 1, np.float32),
+            ("sum", np.float64, 0, np.float64),
+        ],
+    )
+    def test_df_empty_min_count_0(self, opname, dtype, exp_value, exp_dtype):
+        df = DataFrame({0: [], 1: []}, dtype=dtype)
+        result = getattr(df, opname)(min_count=0)
+
+        expected = Series([exp_value, exp_value], dtype=exp_dtype)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "opname, dtype, exp_dtype",
+        [
+            ("sum", np.int8, np.float64),
+            ("prod", np.int8, np.float64),
+            ("sum", np.int64, np.float64),
+            ("prod", np.int64, np.float64),
+            ("sum", np.uint8, np.float64),
+            ("prod", np.uint8, np.float64),
+            ("sum", np.uint64, np.float64),
+            ("prod", np.uint64, np.float64),
+            ("sum", np.float32, np.float32),
+            ("prod", np.float32, np.float32),
+            ("sum", np.float64, np.float64),
+        ],
+    )
+    def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
+        df = DataFrame({0: [], 1: []}, dtype=dtype)
+        result = getattr(df, opname)(min_count=1)
+
+        expected = Series([np.nan, np.nan], dtype=exp_dtype)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "opname, dtype, exp_value, exp_dtype",
+        [
+            ("sum", "Int8", 0, "Int64"),
+            ("prod", "Int8", 1, "Int64"),
+            ("sum", "Int64", 0, "Int64"),
+            ("prod", "Int64", 1, "Int64"),
+            ("sum", "UInt8", 0, "UInt64"),
+            ("prod", "UInt8", 1, "UInt64"),
+            ("sum", "UInt64", 0, "UInt64"),
+            ("prod", "UInt64", 1, "UInt64"),
+            ("sum", "Float32", 0, "Float32"),
+            ("prod", "Float32", 1, "Float32"),
+            ("sum", "Float64", 0, "Float64"),
+        ],
+    )
+    def test_df_empty_nullable_min_count_0(self, opname, dtype, exp_value, exp_dtype):
+        df = DataFrame({0: [], 1: []}, dtype=dtype)
+        result = getattr(df, opname)(min_count=0)
+
+        expected = Series([exp_value, exp_value], dtype=exp_dtype)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "opname, dtype, exp_dtype",
+        [
+            ("sum", "Int8", "Int8"),
+            ("prod", "Int8", "Int8"),
+            ("sum", "Int64", "Int64"),
+            ("prod", "Int64", "Int64"),
+            ("sum", "UInt8", "UInt8"),
+            ("prod", "UInt8", "UInt8"),
+            ("sum", "UInt64", "UInt64"),
+            ("prod", "UInt64", "UInt64"),
+            ("sum", "Float32", "Float32"),
+            ("prod", "Float32", "Float32"),
+            ("sum", "Float64", "Float64"),
+        ],
+    )
+    def test_df_empty_nullable_min_count_1(self, opname, dtype, exp_dtype):
+        df = DataFrame({0: [], 1: []}, dtype=dtype)
+        result = getattr(df, opname)(min_count=1)
+
+        expected = Series([pd.NA, pd.NA], dtype=exp_dtype)
+        tm.assert_series_equal(result, expected)
+
+
 class TestNuisanceColumns:
     @pytest.mark.parametrize("method", ["any", "all"])
     def test_any_all_categorical_dtype_nuisance_column(self, method):
@@ -1678,7 +1772,9 @@ def test_minmax_extensionarray(method, numeric_only):
     df = DataFrame({"Int64": ser})
     result = getattr(df, method)(numeric_only=numeric_only)
     expected = Series(
-        [getattr(int64_info, method)], index=Index(["Int64"], dtype="object")
+        [getattr(int64_info, method)],
+        index=Index(["Int64"], dtype="object"),
+        dtype=pd.Int64Dtype(),
     )
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index e5599d60b4f0d..aaedf00932345 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -945,7 +945,7 @@ def test_apply_multi_level_name(category):
         b = pd.Categorical(b, categories=[1, 2, 3])
         expected_index = pd.CategoricalIndex([1, 2, 3], categories=[1, 2, 3], name="B")
         # GH#40669 - summing an empty frame gives float dtype
-        expected_values = [20.0, 25.0, 0.0]
+        expected_values = [20, 25, 0]
     else:
         expected_index = Index([1, 2], name="B")
         expected_values = [20, 25]

From e399af3d78f9d7e784894a1c91db619eb28f260e Mon Sep 17 00:00:00 2001
From: Terji Petersen <terji78@gmail.com>
Date: Mon, 17 Apr 2023 18:55:21 +0100
Subject: [PATCH 2/7] ENH: Better dtype inference when doing reductions on
 dataframes of nullable arrays

---
 doc/source/whatsnew/v2.1.0.rst | 35 ++++++++++++++++++++++++++++++----
 pandas/core/frame.py           | 13 +++++++++++--
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 4c1399a0defe7..f11675a78d518 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -14,12 +14,39 @@ including other versions of pandas.
 Enhancements
 ~~~~~~~~~~~~
 
-.. _whatsnew_210.enhancements.enhancement1:
+.. _whatsnew_210.enhancements.better_dtype_inference_for_frame_reductions:
+
+Better dtype inference when doing reductions on dataframes of nullable arrays
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Dtype inference when doing reductions on DataFrames with nullable arrays has been improved.
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+   In [1]: df = pd.DataFrame({"a": [1], "b": [pd.NA]}, dtype="Int64")
+   In [2]: df.sum()
+   a    1
+   b    0
+   dtype: int64
+   In [3]: df.sum(min_count=1)
+   a       1
+   b    <NA>
+   dtype: object
+
+With the new behavior, we keep the original dtype:
+
+*New behavior*:
+
+.. ipython:: python
+
+   df = pd.DataFrame({"a": [1], "b": [pd.NA]}, dtype="Int64")
+   df.sum()
+   df.sum(min_count=1)
 
-enhancement1
-^^^^^^^^^^^^
 
-.. _whatsnew_210.enhancements.enhancement2:
+.. _whatsnew_210.enhancements.map_works_for_all_array_types:
 
 ``map(func, na_action="ignore")`` now works for all array types
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0b060007bed06..63ec449a07633 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -51,6 +51,7 @@
     NoDefault,
     is_range_indexer,
     no_default,
+    infer_dtype,
 )
 from pandas.compat import PYPY
 from pandas.compat._optional import import_optional_dependency
@@ -106,6 +107,8 @@
     is_sequence,
     needs_i8_conversion,
     pandas_dtype,
+    is_unsigned_integer_dtype,
+    is_signed_integer_dtype,
 )
 from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.missing import (
@@ -10913,10 +10916,16 @@ def _get_data() -> DataFrame:
             out = out.astype(out_dtype)
         elif is_ext_dtype and out.dtype == common_dtype.type:
             out = out.astype(common_dtype)
-        elif out.dtype == object and isna(out).all():
-            out = out.astype(common_dtype)
         elif (df._mgr.get_dtypes() == object).any():
             out = out.astype(object)
+        elif is_ext_dtype and out.dtype == object:
+            inferred_dtype = infer_dtype(out)
+            if isna(out).all():
+                out = out.astype(common_dtype)
+            elif inferred_dtype == "integer":
+                out = out.astype("Int64")
+            elif inferred_dtype == "float":
+                out = out.astype("Float64")
 
         return out
 

From 7864b034225261f82727014436c189a5a11109d3 Mon Sep 17 00:00:00 2001
From: Terji Petersen <terji78@gmail.com>
Date: Mon, 17 Apr 2023 19:10:25 +0100
Subject: [PATCH 3/7] add issue number

---
 doc/source/whatsnew/v2.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index f11675a78d518..e434d1b3ac3e9 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -19,7 +19,7 @@ Enhancements
 Better dtype inference when doing reductions on dataframes of nullable arrays
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Dtype inference when doing reductions on DataFrames with nullable arrays has been improved.
+Dtype inference when doing reductions on DataFrames with nullable arrays has been improved (:issue:`52707`).
 
 *Previous behavior*:
 

From 4f386349e7c01e03052b99879cdfcb1967760ec5 Mon Sep 17 00:00:00 2001
From: Terji Petersen <terji78@gmail.com>
Date: Mon, 17 Apr 2023 19:22:16 +0100
Subject: [PATCH 4/7] various pre-commit stuff

---
 pandas/core/frame.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 63ec449a07633..2de7eb923b841 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -49,9 +49,9 @@
 from pandas._libs.hashtable import duplicated
 from pandas._libs.lib import (
     NoDefault,
+    infer_dtype,
     is_range_indexer,
     no_default,
-    infer_dtype,
 )
 from pandas.compat import PYPY
 from pandas.compat._optional import import_optional_dependency
@@ -107,8 +107,6 @@
     is_sequence,
     needs_i8_conversion,
     pandas_dtype,
-    is_unsigned_integer_dtype,
-    is_signed_integer_dtype,
 )
 from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.missing import (

From 60f0b1eeb4bec7f77fc88cca64c9724483ea5673 Mon Sep 17 00:00:00 2001
From: Terji Petersen <terji78@gmail.com>
Date: Mon, 17 Apr 2023 23:21:17 +0100
Subject: [PATCH 5/7] platform issues

---
 pandas/tests/frame/test_reductions.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 75c3f81cb7560..73d72af490f02 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pytest
 
-from pandas.compat import is_platform_windows
+from pandas.compat import is_platform_windows, IS64
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -1549,11 +1549,11 @@ class TestEmptyDataFrameReductions:
         "opname, dtype, exp_value, exp_dtype",
         [
             ("sum", np.int8, 0, np.int64),
-            ("prod", np.int8, 1, np.int64),
+            ("prod", np.int8, 1, np.int_),
             ("sum", np.int64, 0, np.int64),
             ("prod", np.int64, 1, np.int64),
             ("sum", np.uint8, 0, np.int64),
-            ("prod", np.uint8, 1, np.uint64),
+            ("prod", np.uint8, 1, np.uint),
             ("sum", np.uint64, 0, np.int64),
             ("prod", np.uint64, 1, np.uint64),
             ("sum", np.float32, 0, np.float32),
@@ -1594,12 +1594,12 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
     @pytest.mark.parametrize(
         "opname, dtype, exp_value, exp_dtype",
         [
-            ("sum", "Int8", 0, "Int64"),
-            ("prod", "Int8", 1, "Int64"),
+            ("sum", "Int8", 0, ("Int64" if IS64 else "Int32")),
+            ("prod", "Int8", 1, ("Int64" if IS64 else "Int32")),
             ("sum", "Int64", 0, "Int64"),
             ("prod", "Int64", 1, "Int64"),
-            ("sum", "UInt8", 0, "UInt64"),
-            ("prod", "UInt8", 1, "UInt64"),
+            ("sum", "UInt8", 0, ("UInt64" if IS64 else "UInt32")),
+            ("prod", "UInt8", 1, ("UInt64" if IS64 else "UInt32")),
             ("sum", "UInt64", 0, "UInt64"),
             ("prod", "UInt64", 1, "UInt64"),
             ("sum", "Float32", 0, "Float32"),

From 23093f912bb69786616961532fc9b60aa6e40a8e Mon Sep 17 00:00:00 2001
From: Terji Petersen <terji78@gmail.com>
Date: Mon, 17 Apr 2023 23:29:39 +0100
Subject: [PATCH 6/7] isort

---
 pandas/tests/frame/test_reductions.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 73d72af490f02..ebc0b402a6619 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -6,7 +6,10 @@
 import numpy as np
 import pytest
 
-from pandas.compat import is_platform_windows, IS64
+from pandas.compat import (
+    IS64,
+    is_platform_windows,
+)
 import pandas.util._test_decorators as td
 
 import pandas as pd

From 1bd00e4da7856b348162dd0a2f0476d7e2700854 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Tue, 18 Apr 2023 08:33:52 +0100
Subject: [PATCH 7/7] fix windows failures

---
 pandas/tests/frame/test_reductions.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index ebc0b402a6619..dee8744388678 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -32,6 +32,8 @@
     nanops,
 )
 
+is_windows_or_is32 = is_platform_windows() or not IS64
+
 
 def assert_stat_op_calc(
     opname,
@@ -1597,12 +1599,12 @@ def test_df_empty_min_count_1(self, opname, dtype, exp_dtype):
     @pytest.mark.parametrize(
         "opname, dtype, exp_value, exp_dtype",
         [
-            ("sum", "Int8", 0, ("Int64" if IS64 else "Int32")),
-            ("prod", "Int8", 1, ("Int64" if IS64 else "Int32")),
+            ("sum", "Int8", 0, ("Int32" if is_windows_or_is32 else "Int64")),
+            ("prod", "Int8", 1, ("Int32" if is_windows_or_is32 else "Int64")),
             ("sum", "Int64", 0, "Int64"),
             ("prod", "Int64", 1, "Int64"),
-            ("sum", "UInt8", 0, ("UInt64" if IS64 else "UInt32")),
-            ("prod", "UInt8", 1, ("UInt64" if IS64 else "UInt32")),
+            ("sum", "UInt8", 0, ("UInt32" if is_windows_or_is32 else "UInt64")),
+            ("prod", "UInt8", 1, ("UInt32" if is_windows_or_is32 else "UInt64")),
             ("sum", "UInt64", 0, "UInt64"),
             ("prod", "UInt64", 1, "UInt64"),
             ("sum", "Float32", 0, "Float32"),