From 4e84f40c45ecaf6ac73ffbc2f278b1852cf38dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 11 May 2023 12:27:08 +0200 Subject: [PATCH 1/7] Add np.intc to merge table --- pandas/core/reshape/merge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a96a08f18e81f..b54c2703f844c 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -111,6 +111,7 @@ np.int32: libhashtable.Int32Factorizer, np.int16: libhashtable.Int16Factorizer, np.int8: libhashtable.Int8Factorizer, + np.intc: libhashtable.Int64Factorizer, np.uint64: libhashtable.UInt64Factorizer, np.uint32: libhashtable.UInt32Factorizer, np.uint16: libhashtable.UInt16Factorizer, From 30a39650e14dca547b0a7b686135e39a06050b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 11 May 2023 12:30:22 +0200 Subject: [PATCH 2/7] Add whatsnew --- doc/source/whatsnew/v2.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 0d6647dc38b3c..c3ccbfba0fcf4 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -17,7 +17,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`) -- +- Fixed regression in :func:`merge` when dtype is ``np.intc`` (:issue:`52451`) .. --------------------------------------------------------------------------- .. _whatsnew_202.bug_fixes: From 2ff9af89b0cd7817605c773df6500de857a5f3f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 11 May 2023 12:30:33 +0200 Subject: [PATCH 3/7] Updated unit test --- pandas/tests/reshape/merge/test_merge.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 017bf1c917e37..122e70254fb6a 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1460,7 +1460,9 @@ def test_different(self, right_vals): result = merge(left, right, on="A") assert is_object_dtype(result.A.dtype) - @pytest.mark.parametrize("d1", [np.int64, np.int32, np.int16, np.int8, np.uint8]) + @pytest.mark.parametrize( + "d1", [np.int64, np.int32, np.intc, np.int16, np.int8, np.uint8] + ) @pytest.mark.parametrize("d2", [np.int64, np.float64, np.float32, np.float16]) def test_join_multi_dtypes(self, d1, d2): dtype1 = np.dtype(d1) From b28f3fe0a99ec78a42368e5dc754db4f2ee2bc3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Thu, 11 May 2023 12:46:13 +0200 Subject: [PATCH 4/7] Fix lint --- doc/source/whatsnew/v2.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index c3ccbfba0fcf4..a2f5175ce6782 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -13,11 +13,11 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :func:`merge` when dtype is ``np.intc`` (:issue:`52451`) - Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`) - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`) -- Fixed regression in :func:`merge` when dtype is ``np.intc`` (:issue:`52451`) .. --------------------------------------------------------------------------- .. _whatsnew_202.bug_fixes: From fbdf6e3dd3eec57877caa8fee814780e76fab205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Mon, 15 May 2023 08:59:02 +0200 Subject: [PATCH 5/7] Only add intc to _factorizers if not int32 --- pandas/core/reshape/merge.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f046e0d0c6745..3d1c41ecad053 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -111,7 +111,6 @@ np.int32: libhashtable.Int32Factorizer, np.int16: libhashtable.Int16Factorizer, np.int8: libhashtable.Int8Factorizer, - np.intc: libhashtable.Int64Factorizer, np.uint64: libhashtable.UInt64Factorizer, np.uint32: libhashtable.UInt32Factorizer, np.uint16: libhashtable.UInt16Factorizer, @@ -124,6 +123,9 @@ np.object_: libhashtable.ObjectFactorizer, } +if np.intc is not np.int32: + _factorizers[np.intc] = libhashtable.Int64Factorizer + _known = (np.ndarray, ExtensionArray, Index, ABCSeries) From ce3488ea72a8687a773a51541f9e0d723768944f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Tue, 16 May 2023 09:29:13 +0200 Subject: [PATCH 6/7] Mention that the problem is on Windows --- doc/source/whatsnew/v2.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 38d42a5035c35..a40eb47508609 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed performance regression in :meth:`GroupBy.apply` (:issue:`53195`) -- Fixed regression in :func:`merge` when dtype is ``np.intc`` (:issue:`52451`) +- Fixed regression in :func:`merge` on Windows when dtype is ``np.intc`` (:issue:`52451`) - Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`) - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) From 54c21bbc6c5d578dcbf3a8c7b0997c8dd0dd1450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20H=C3=B8xbro=20Hansen?= Date: Tue, 16 May 2023 20:22:37 +0200 Subject: [PATCH 7/7] Update pandas/core/reshape/merge.py --- pandas/core/reshape/merge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 3d1c41ecad053..8b9a498b27bbe 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -123,6 +123,7 @@ np.object_: libhashtable.ObjectFactorizer, } +# See https://github.com/pandas-dev/pandas/issues/52451 if np.intc is not np.int32: _factorizers[np.intc] = libhashtable.Int64Factorizer