From 33b82fb4d3fd1e0c9b917ed49ecfbd86457cc83b Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Mon, 27 Mar 2023 18:19:25 -0400
Subject: [PATCH 1/4] BUG: Revert GH#51335

---
 pandas/core/frame.py                  | 122 ++++++++++++++++++--------
 pandas/tests/frame/test_reductions.py |  15 +++-
 2 files changed, 97 insertions(+), 40 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bcba7c8c13f8c..69fa44c6ebb8b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -102,6 +102,7 @@
     is_integer_dtype,
     is_iterator,
     is_list_like,
+    is_object_dtype,
     is_scalar,
     is_sequence,
     needs_i8_conversion,
@@ -10925,44 +10926,93 @@ def _get_data() -> DataFrame:
                 data = self._get_bool_data()
             return data
 
-        # Case with EAs see GH#35881
-        df = self
-        if numeric_only:
-            df = _get_data()
+        if numeric_only or axis == 0:
+            # For numeric_only non-None and axis non-None, we know
+            #  which blocks to use and no try/except is needed.
+            #  For numeric_only=None only the case with axis==0 and no object
+            #  dtypes are unambiguous can be handled with BlockManager.reduce
+            # Case with EAs see GH#35881
+            df = self
+            if numeric_only:
+                df = _get_data()
+            if axis == 1:
+                df = df.T
+                axis = 0
+
+            # After possibly _get_data and transposing, we are now in the
+            #  simple case where we can use BlockManager.reduce
+            res = df._mgr.reduce(blk_func)
+            out = df._constructor(res).iloc[0]
+            if out_dtype is not None:
+                out = out.astype(out_dtype)
+            if axis == 0 and len(self) == 0 and name in ["sum", "prod"]:
+                # Even if we are object dtype, follow numpy and return
+                #  float64, see test_apply_funcs_over_empty
+                out = out.astype(np.float64)
+
+            return out
+
+        assert not numeric_only and axis in (1, None)
+
+        data = self
+        values = data.values
+        result = func(values)
+
+        if hasattr(result, "dtype"):
+            if filter_type == "bool" and notna(result).all():
+                result = result.astype(np.bool_)
+            elif filter_type is None and is_object_dtype(result.dtype):
+                try:
+                    result = result.astype(np.float64)
+                except (ValueError, TypeError):
+                    # try to coerce to the original dtypes item by item if we can
+                    pass
+
         if axis is None:
-            return func(df.values)
-        elif axis == 1:
-            if len(df.index) == 0:
-                # Taking a transpose would result in no columns, losing the dtype.
-                # In the empty case, reducing along axis 0 or 1 gives the same
-                # result dtype, so reduce with axis=0 and ignore values
-                result = df._reduce(
-                    op,
-                    name,
-                    axis=0,
-                    skipna=skipna,
-                    numeric_only=False,
-                    filter_type=filter_type,
-                    **kwds,
-                ).iloc[:0]
-                result.index = df.index
-                return result
-            df = df.T
-
-        # After possibly _get_data and transposing, we are now in the
-        #  simple case where we can use BlockManager.reduce
-        res = df._mgr.reduce(blk_func)
-        out = df._constructor(res).iloc[0]
-        if out_dtype is not None:
-            out = out.astype(out_dtype)
-        elif (df._mgr.get_dtypes() == object).any():
-            out = out.astype(object)
-        elif len(self) == 0 and name in ("sum", "prod"):
-            # Even if we are object dtype, follow numpy and return
-            #  float64, see test_apply_funcs_over_empty
-            out = out.astype(np.float64)
+            return result
 
-        return out
+        labels = self._get_agg_axis(axis)
+        result = self._constructor_sliced(result, index=labels)
+        return result
+
+        # # Case with EAs see GH#35881
+        # df = self
+        # if numeric_only:
+        #     df = _get_data()
+        # if axis is None:
+        #     return func(df.values)
+        # elif axis == 1:
+        #     if len(df.index) == 0:
+        #         # Taking a transpose would result in no columns, losing the dtype.
+        #         # In the empty case, reducing along axis 0 or 1 gives the same
+        #         # result dtype, so reduce with axis=0 and ignore values
+        #         result = df._reduce(
+        #             op,
+        #             name,
+        #             axis=0,
+        #             skipna=skipna,
+        #             numeric_only=False,
+        #             filter_type=filter_type,
+        #             **kwds,
+        #         ).iloc[:0]
+        #         result.index = df.index
+        #         return result
+        #     df = df.T
+        #
+        # # After possibly _get_data and transposing, we are now in the
+        # #  simple case where we can use BlockManager.reduce
+        # res = df._mgr.reduce(blk_func)
+        # out = df._constructor(res).iloc[0]
+        # if out_dtype is not None:
+        #     out = out.astype(out_dtype)
+        # elif (df._mgr.get_dtypes() == object).any():
+        #     out = out.astype(object)
+        # elif len(self) == 0 and name in ("sum", "prod"):
+        #     # Even if we are object dtype, follow numpy and return
+        #     #  float64, see test_apply_funcs_over_empty
+        #     out = out.astype(np.float64)
+        #
+        # return out
 
     def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
         """
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 28809e2ecb788..b57337ebeff76 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -317,8 +317,10 @@ def wrapper(x):
             DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object),
         ],
     )
-    def test_stat_operators_attempt_obj_array(self, method, df, axis):
+    def test_stat_operators_attempt_obj_array(self, method, df, axis, request):
         # GH#676
+        if axis in (1, "columns") or method not in ("sum", "prod", "min", "max"):
+            request.node.add_marker(pytest.mark.xfail(reason="Revert of GH#51335"))
         assert df.values.dtype == np.object_
         result = getattr(df, method)(axis=axis)
         expected = getattr(df.astype("f8"), method)(axis=axis).astype(object)
@@ -402,6 +404,7 @@ def test_mean_includes_datetimes(self, tz):
         expected = Series([Timestamp("2000", tz=tz)], index=["A"])
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(reason="Revert of GH#51335")
     def test_mean_mixed_string_decimal(self):
         # GH 11670
         # possible bug when calculating mean of DataFrame?
@@ -731,7 +734,9 @@ def test_sum_corner(self):
             tm.makePeriodIndex(0),
         ],
     )
-    def test_axis_1_empty(self, all_reductions, index, using_array_manager):
+    def test_axis_1_empty(self, all_reductions, index, using_array_manager, request):
+        if all_reductions not in ("count", "any", "all"):
+            request.node.add_marker(pytest.mark.xfail(reason="Revert of GH#51335"))
         df = DataFrame(columns=["a"], index=index)
         result = getattr(df, all_reductions)(axis=1)
         if all_reductions in ("any", "all"):
@@ -1464,6 +1469,7 @@ def test_preserve_timezone(self, initial: str, method):
         result = getattr(df, method)(axis=1)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(reason="GH#51335")
     @pytest.mark.parametrize("method", ["min", "max"])
     def test_minmax_tzaware_skipna_axis_1(self, method, skipna):
         # GH#51242
@@ -1671,9 +1677,10 @@ def test_prod_sum_min_count_mixed_object():
 
 @pytest.mark.parametrize("method", ["min", "max", "mean", "median", "skew", "kurt"])
 @pytest.mark.parametrize("numeric_only", [True, False])
-def test_reduction_axis_none_returns_scalar(method, numeric_only):
+def test_reduction_axis_none_returns_scalar(method, numeric_only, request):
     # GH#21597 As of 2.0, axis=None reduces over all axes.
-
+    if numeric_only:
+        request.node.add_marker(pytest.mark.xfail(reason="Revert of GH#51335"))
     df = DataFrame(np.random.randn(4, 4))
 
     result = getattr(df, method)(axis=None, numeric_only=numeric_only)

From 5cd80c4f4800767cb91d8a1831bac38067543390 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Mon, 27 Mar 2023 18:34:26 -0400
Subject: [PATCH 2/4] cleanup

---
 pandas/core/frame.py | 39 ---------------------------------------
 1 file changed, 39 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 69fa44c6ebb8b..96048a454819f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -10975,45 +10975,6 @@ def _get_data() -> DataFrame:
         result = self._constructor_sliced(result, index=labels)
         return result
 
-        # # Case with EAs see GH#35881
-        # df = self
-        # if numeric_only:
-        #     df = _get_data()
-        # if axis is None:
-        #     return func(df.values)
-        # elif axis == 1:
-        #     if len(df.index) == 0:
-        #         # Taking a transpose would result in no columns, losing the dtype.
-        #         # In the empty case, reducing along axis 0 or 1 gives the same
-        #         # result dtype, so reduce with axis=0 and ignore values
-        #         result = df._reduce(
-        #             op,
-        #             name,
-        #             axis=0,
-        #             skipna=skipna,
-        #             numeric_only=False,
-        #             filter_type=filter_type,
-        #             **kwds,
-        #         ).iloc[:0]
-        #         result.index = df.index
-        #         return result
-        #     df = df.T
-        #
-        # # After possibly _get_data and transposing, we are now in the
-        # #  simple case where we can use BlockManager.reduce
-        # res = df._mgr.reduce(blk_func)
-        # out = df._constructor(res).iloc[0]
-        # if out_dtype is not None:
-        #     out = out.astype(out_dtype)
-        # elif (df._mgr.get_dtypes() == object).any():
-        #     out = out.astype(object)
-        # elif len(self) == 0 and name in ("sum", "prod"):
-        #     # Even if we are object dtype, follow numpy and return
-        #     #  float64, see test_apply_funcs_over_empty
-        #     out = out.astype(np.float64)
-        #
-        # return out
-
     def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
         """
         Special case for _reduce to try to avoid a potentially-expensive transpose.

From dbf63e358791ba06bfacfac5c336951205336399 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 28 Mar 2023 17:06:12 -0400
Subject: [PATCH 3/4] revert whatsnew

---
 doc/source/whatsnew/v2.0.0.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 80b52d3b3955e..02d6a3c4312cc 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -777,7 +777,7 @@ Other API changes
 - The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`)
 - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
 - The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`)
-- :class:`DataFrame` and :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`51205`, :issue:`49603`)
+- :class:`DataFrameGroupBy` aggregations (e.g. "sum") with object-dtype columns no longer infer non-object dtypes for their results, explicitly call ``result.infer_objects(copy=False)`` on the result to obtain the old behavior (:issue:`51205`, :issue:`49603`)
 - Division by zero with :class:`ArrowDtype` dtypes returns ``-inf``, ``nan``, or ``inf`` depending on the numerator, instead of raising (:issue:`51541`)
 - Added :func:`pandas.api.types.is_any_real_numeric_dtype` to check for real numeric dtypes (:issue:`51152`)
 - :meth:`~arrays.ArrowExtensionArray.value_counts` now returns data with :class:`ArrowDtype` with ``pyarrow.int64`` type instead of ``"Int64"`` type (:issue:`51462`)
@@ -1204,11 +1204,11 @@ Numeric
 ^^^^^^^
 - Bug in :meth:`DataFrame.add` cannot apply ufunc when inputs contain mixed DataFrame type and Series type (:issue:`39853`)
 - Bug in arithmetic operations on :class:`Series` not propagating mask when combining masked dtypes and numpy dtypes (:issue:`45810`, :issue:`42630`)
+- Bug in DataFrame reduction methods (e.g. :meth:`DataFrame.sum`) with object dtype, ``axis=1`` and ``numeric_only=False`` would not be coerced to float (:issue:`49551`)
 - Bug in :meth:`DataFrame.sem` and :meth:`Series.sem` where an erroneous ``TypeError`` would always raise when using data backed by an :class:`ArrowDtype` (:issue:`49759`)
 - Bug in :meth:`Series.__add__` casting to object for list and masked :class:`Series` (:issue:`22962`)
 - Bug in :meth:`~arrays.ArrowExtensionArray.mode` where ``dropna=False`` was not respected when there was ``NA`` values (:issue:`50982`)
 - Bug in :meth:`DataFrame.query` with ``engine="numexpr"`` and column names are ``min`` or ``max`` would raise a ``TypeError`` (:issue:`50937`)
-- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with tz-aware data containing ``pd.NaT`` and ``axis=1`` would return incorrect results (:issue:`51242`)
 
 Conversion
 ^^^^^^^^^^

From cb43a0963f5f9ee05148cad75f3628d33253309f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Tue, 28 Mar 2023 17:13:40 -0400
Subject: [PATCH 4/4] xfail for array manager

---
 pandas/tests/frame/test_reductions.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index b57337ebeff76..b40ba4bf48eaa 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -317,9 +317,15 @@ def wrapper(x):
             DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object),
         ],
     )
-    def test_stat_operators_attempt_obj_array(self, method, df, axis, request):
+    def test_stat_operators_attempt_obj_array(
+        self, method, df, axis, request, using_array_manager
+    ):
         # GH#676
-        if axis in (1, "columns") or method not in ("sum", "prod", "min", "max"):
+        if (
+            axis in (1, "columns")
+            or method not in ("sum", "prod", "min", "max")
+            or using_array_manager
+        ):
             request.node.add_marker(pytest.mark.xfail(reason="Revert of GH#51335"))
         assert df.values.dtype == np.object_
         result = getattr(df, method)(axis=axis)