From 84c78c3a5960bc1076e2e7a0aed92f1c5357a742 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 10:54:32 -0700 Subject: [PATCH] BUG/DEPR: logical operation with bool and string --- doc/source/whatsnew/v2.3.2.rst | 2 +- pandas/core/arrays/arrow/array.py | 21 +++++++++++++++++++++ pandas/core/arrays/string_.py | 21 +++++++++++++++++++++ pandas/tests/strings/test_strings.py | 24 ++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.3.2.rst b/doc/source/whatsnew/v2.3.2.rst index 03244c808ad03..53a8d28687518 100644 --- a/doc/source/whatsnew/v2.3.2.rst +++ b/doc/source/whatsnew/v2.3.2.rst @@ -25,7 +25,7 @@ Bug fixes - Fix :meth:`~DataFrame.to_json` with ``orient="table"`` to correctly use the "string" type in the JSON Table Schema for :class:`StringDtype` columns (:issue:`61889`) - +- Boolean operations (``|``, ``&``, ``^``) with bool-dtype objects on the left and :class:`StringDtype` objects on the right now cast the string to bool, with a deprecation warning (:issue:`60234`) .. --------------------------------------------------------------------------- .. _whatsnew_232.contributors: diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 919453b29b7f9..ad1d576bfec32 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -12,6 +12,7 @@ overload, ) import unicodedata +import warnings import numpy as np @@ -27,6 +28,7 @@ pa_version_under13p0, ) from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( can_hold_element, @@ -852,6 +854,25 @@ def _logical_method(self, other, op) -> Self: # integer types. Otherwise these are boolean ops. if pa.types.is_integer(self._pa_array.type): return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS) + elif ( + ( + pa.types.is_string(self._pa_array.type) + or pa.types.is_large_string(self._pa_array.type) + ) + and op in (roperator.ror_, roperator.rand_, roperator.rxor) + and isinstance(other, np.ndarray) + and other.dtype == bool + ): + # GH#60234 backward compatibility for the move to StringDtype in 3.0 + op_name = op.__name__[1:].strip("_") + warnings.warn( + f"'{op_name}' operations between boolean dtype and {self.dtype} are " + "deprecated and will raise in a future version. Explicitly " + "cast the strings to a boolean dtype before operating instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return op(other, self.astype(bool)) else: return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f52b709a59de9..198dc4c483277 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -52,6 +52,7 @@ missing, nanops, ops, + roperator, ) from pandas.core.algorithms import isin from pandas.core.array_algos import masked_reductions @@ -390,6 +391,26 @@ class BaseStringArray(ExtensionArray): dtype: StringDtype + # TODO(4.0): Once the deprecation here is enforced, this method can be + # removed and we use the parent class method instead. + def _logical_method(self, other, op): + if ( + op in (roperator.ror_, roperator.rand_, roperator.rxor) + and isinstance(other, np.ndarray) + and other.dtype == bool + ): + # GH#60234 backward compatibility for the move to StringDtype in 3.0 + op_name = op.__name__[1:].strip("_") + warnings.warn( + f"'{op_name}' operations between boolean dtype and {self.dtype} are " + "deprecated and will raise in a future version. Explicitly " + "cast the strings to a boolean dtype before operating instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return op(other, self.astype(bool)) + return NotImplemented + @doc(ExtensionArray.tolist) def tolist(self) -> list: if self.ndim > 1: diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 695eef06e3b9d..7083489ce7c8c 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -787,3 +787,27 @@ def test_decode_with_dtype_none(): result = ser.str.decode("utf-8", dtype=None) expected = Series(["a", "b", "c"], dtype="str") tm.assert_series_equal(result, expected) + + +def test_reversed_logical_ops(any_string_dtype): + # GH#60234 + dtype = any_string_dtype + warn = None if dtype == object else FutureWarning + left = Series([True, False, False, True]) + right = Series(["", "", "b", "c"], dtype=dtype) + + msg = "operations between boolean dtype and" + with tm.assert_produces_warning(warn, match=msg): + result = left | right + expected = left | right.astype(bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(warn, match=msg): + result = left & right + expected = left & right.astype(bool) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(warn, match=msg): + result = left ^ right + expected = left ^ right.astype(bool) + tm.assert_series_equal(result, expected)