From ee4a2b63c11c5cff9493c557a6e1e8bb7d93deb4 Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Fri, 24 May 2024 16:44:21 +0200 Subject: [PATCH 01/10] remove core.computation.ops.Div resolves #21374 #58748 --- pandas/core/computation/expr.py | 5 +--- pandas/core/computation/ops.py | 51 --------------------------------- 2 files changed, 1 insertion(+), 55 deletions(-) diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index b287cd542068d..ee5f437b05987 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -32,7 +32,6 @@ UNARY_OPS_SYMS, BinOp, Constant, - Div, FuncNode, Op, Term, @@ -374,6 +373,7 @@ class BaseExprVisitor(ast.NodeVisitor): "Add", "Sub", "Mult", + "Div", None, "Pow", "FloorDiv", @@ -537,9 +537,6 @@ def visit_BinOp(self, node, **kwargs): left, right = self._maybe_downcast_constants(left, right) return self._maybe_evaluate_binop(op, op_class, left, right) - def visit_Div(self, node, **kwargs): - return lambda lhs, rhs: Div(lhs, rhs) - def visit_UnaryOp(self, node, **kwargs): op = self.visit(node.op) operand = self.visit(node.operand) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 056325fd2e4ab..63029e88a0779 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -328,31 +328,6 @@ def _not_in(x, y): _binary_ops_dict.update(d) -def _cast_inplace(terms, acceptable_dtypes, dtype) -> None: - """ - Cast an expression inplace. - - Parameters - ---------- - terms : Op - The expression that should cast. - acceptable_dtypes : list of acceptable numpy.dtype - Will not cast if term's dtype in this list. - dtype : str or numpy.dtype - The dtype to cast to. - """ - dt = np.dtype(dtype) - for term in terms: - if term.type in acceptable_dtypes: - continue - - try: - new_value = term.value.astype(dt) - except AttributeError: - new_value = dt.type(term.value) - term.update(new_value) - - def is_term(obj) -> bool: return isinstance(obj, Term) @@ -509,32 +484,6 @@ def _disallow_scalar_only_bool_ops(self) -> None: raise NotImplementedError("cannot evaluate scalar only bool ops") -class Div(BinOp): - """ - Div operator to special case casting. - - Parameters - ---------- - lhs, rhs : Term or Op - The Terms or Ops in the ``/`` expression. - """ - - def __init__(self, lhs, rhs) -> None: - super().__init__("/", lhs, rhs) - - if not is_numeric_dtype(lhs.return_type) or not is_numeric_dtype( - rhs.return_type - ): - raise TypeError( - f"unsupported operand type(s) for {self.op}: " - f"'{lhs.return_type}' and '{rhs.return_type}'" - ) - - # do not upcast float32s to float64 un-necessarily - acceptable_dtypes = [np.float32, np.float64] - _cast_inplace(com.flatten(self), acceptable_dtypes, np.float64) - - UNARY_OPS_SYMS = ("+", "-", "~", "not") _unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert) _unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs)) From f162a38902c3163bc1c918e20e1727e5158d0351 Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Tue, 4 Jun 2024 15:29:59 +0200 Subject: [PATCH 02/10] need to preserve order --- pandas/core/computation/expr.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index ee5f437b05987..b074e768e0842 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -374,7 +374,6 @@ class BaseExprVisitor(ast.NodeVisitor): "Sub", "Mult", "Div", - None, "Pow", "FloorDiv", "Mod", From bffe468d829088e02bc65752d928d1c42291db3d Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Fri, 28 Jun 2024 23:45:14 +0200 Subject: [PATCH 03/10] updating tests --- pandas/conftest.py | 15 +++++++++++++++ pandas/core/computation/ops.py | 1 - pandas/tests/computation/test_eval.py | 23 ++++++++++++++++------- pandas/tests/frame/test_query_eval.py | 2 +- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index c3bfc8c06ad8a..8ca43e94e2bff 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1448,6 +1448,21 @@ def complex_dtype(request): return request.param +@pytest.fixture(params=tm.COMPLEX_DTYPES + tm.FLOAT_NUMPY_DTYPES) +def complex_or_float_dtype(request): + """ + Parameterized fixture for complex and numpy float dtypes. + + * complex + * 'complex64' + * 'complex128' + * float + * 'float32' + * 'float64' + """ + return request.param + + @pytest.fixture(params=tm.SIGNED_INT_NUMPY_DTYPES) def any_signed_int_numpy_dtype(request): """ diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 63029e88a0779..a1a5f77f8539e 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -18,7 +18,6 @@ from pandas.core.dtypes.common import ( is_list_like, - is_numeric_dtype, is_scalar, ) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index d52f33fe80434..3206cf339adb4 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -758,16 +758,25 @@ class TestTypeCasting: # maybe someday... numexpr has too many upcasting rules now # chain(*(np.core.sctypes[x] for x in ['uint', 'int', 'float'])) @pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")]) - def test_binop_typecasting(self, engine, parser, op, float_numpy_dtype, left_right): - df = DataFrame( - np.random.default_rng(2).standard_normal((5, 3)), dtype=float_numpy_dtype - ) + def test_binop_typecasting( + self, engine, parser, op, complex_or_float_dtype, left_right, request + ): + # GH#21374 + dtype = complex_or_float_dtype + df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dtype) left, right = left_right s = f"{left} {op} {right}" res = pd.eval(s, engine=engine, parser=parser) - assert df.values.dtype == float_numpy_dtype - assert res.values.dtype == float_numpy_dtype - tm.assert_frame_equal(res, eval(s)) + if dtype == "complex64" and engine == "numexpr": + mark = pytest.mark.xfail( + reason="numexpr issue with complex that are upcast " + "to complex 128 " + "https://github.com/pydata/numexpr/issues/492" + ) + request.node.add_marker(mark) + assert df.values.dtype == dtype + assert res.values.dtype == dtype + tm.assert_frame_equal(res, eval(s), check_exact=False) # ------------------------------------- diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index ff1bf5632e920..4f5c0ef44a42d 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -206,7 +206,7 @@ def test_extension_array_eval(self, engine, parser): # GH#58748 df = DataFrame({"a": pd.array([1, 2, 3]), "b": pd.array([4, 5, 6])}) result = df.eval("a / b", engine=engine, parser=parser) - expected = Series([0.25, 0.40, 0.50]) + expected = Series(pd.array([0.25, 0.40, 0.50])) tm.assert_series_equal(result, expected) From f05d5fb483222fb8b262f909a6b9711b3b3d401d Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Sat, 29 Jun 2024 00:01:11 +0200 Subject: [PATCH 04/10] update whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d9d2330f8f11b..b655032305be9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -615,6 +615,7 @@ Other ^^^^^ - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) +- Bug in :func:`eval` on :class:`complex` including division ``/`` discards imaginary part. (:issue:`21374`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`) From c81cc6bf2d83437b6d84da17b0c4d00612628d2a Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Sat, 29 Jun 2024 07:59:46 +0200 Subject: [PATCH 05/10] solve mypy issue --- pandas/conftest.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 8ca43e94e2bff..ac1b3c0c13f67 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -84,6 +84,9 @@ Hashable, Iterator, ) +from typing import cast + +from pandas._typing import Dtype try: import pyarrow as pa @@ -1448,7 +1451,7 @@ def complex_dtype(request): return request.param -@pytest.fixture(params=tm.COMPLEX_DTYPES + tm.FLOAT_NUMPY_DTYPES) +@pytest.fixture(params=tm.COMPLEX_DTYPES + cast(list[Dtype], tm.FLOAT_NUMPY_DTYPES)) def complex_or_float_dtype(request): """ Parameterized fixture for complex and numpy float dtypes. From 0451e7a5bf71769c7942c503c3894782c21024f8 Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Sat, 29 Jun 2024 08:07:28 +0200 Subject: [PATCH 06/10] fixing pytests --- pandas/tests/frame/test_query_eval.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 4f5c0ef44a42d..8a5e975321404 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -202,8 +202,13 @@ def test_eval_simple(self, engine, parser): expected = df["a"] tm.assert_series_equal(expected, res) - def test_extension_array_eval(self, engine, parser): + def test_extension_array_eval(self, engine, parser, request): # GH#58748 + if engine == "numexpr": + mark = pytest.mark.xfail( + reason="numexpr does not support extension array dtypes" + ) + request.node.add_marker(mark) df = DataFrame({"a": pd.array([1, 2, 3]), "b": pd.array([4, 5, 6])}) result = df.eval("a / b", engine=engine, parser=parser) expected = Series(pd.array([0.25, 0.40, 0.50])) From ee5e6c4eb4835f9ca798ea4128b6bc05b388df88 Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Sun, 30 Jun 2024 10:24:18 +0200 Subject: [PATCH 07/10] better than cast --- pandas/_testing/__init__.py | 1 + pandas/conftest.py | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index fb8ca8aad3428..1cd91ee5b120c 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -107,6 +107,7 @@ COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"] STRING_DTYPES: list[Dtype] = [str, "str", "U"] +COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES] DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"] TIMEDELTA64_DTYPES: list[Dtype] = ["timedelta64[ns]", "m8[ns]"] diff --git a/pandas/conftest.py b/pandas/conftest.py index ac1b3c0c13f67..70e729dfb98a4 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -84,9 +84,6 @@ Hashable, Iterator, ) -from typing import cast - -from pandas._typing import Dtype try: import pyarrow as pa @@ -1451,7 +1448,7 @@ def complex_dtype(request): return request.param -@pytest.fixture(params=tm.COMPLEX_DTYPES + cast(list[Dtype], tm.FLOAT_NUMPY_DTYPES)) +@pytest.fixture(params=tm.COMPLEX_FLOAT_DTYPES) def complex_or_float_dtype(request): """ Parameterized fixture for complex and numpy float dtypes. From 7bf740ec27d4fd08dbba0fb441fc34c22e32b64e Mon Sep 17 00:00:00 2001 From: Laurent Mutricy Date: Sun, 30 Jun 2024 10:36:28 +0200 Subject: [PATCH 08/10] adding specific test --- pandas/tests/frame/test_query_eval.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 8a5e975321404..a1cca3b4174f6 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -214,6 +214,13 @@ def test_extension_array_eval(self, engine, parser, request): expected = Series(pd.array([0.25, 0.40, 0.50])) tm.assert_series_equal(result, expected) + def test_complex_eval(self, engine, parser): + # GH#21374 + df = DataFrame({"a": [1 + 2j], "b": [1 + 1j]}) + result = df.eval("a/b", engine=engine, parser=parser) + expected = Series([1.5 + 0.5j]) + tm.assert_series_equal(result, expected) + class TestDataFrameQueryWithMultiIndex: def test_query_with_named_multiindex(self, parser, engine): From 33fa0c6e5d80bcb7b2e952a8ea3f75ba587902fe Mon Sep 17 00:00:00 2001 From: mutricyl <118692416+mutricyl@users.noreply.github.com> Date: Mon, 1 Jul 2024 20:22:49 +0200 Subject: [PATCH 09/10] Update pandas/tests/frame/test_query_eval.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/frame/test_query_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index a1cca3b4174f6..c9ea5f379f1e9 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -208,7 +208,7 @@ def test_extension_array_eval(self, engine, parser, request): mark = pytest.mark.xfail( reason="numexpr does not support extension array dtypes" ) - request.node.add_marker(mark) + request.applymarker(mark) df = DataFrame({"a": pd.array([1, 2, 3]), "b": pd.array([4, 5, 6])}) result = df.eval("a / b", engine=engine, parser=parser) expected = Series(pd.array([0.25, 0.40, 0.50])) From 3cb65eb7c7e9fd6a8fafc7a4ba5725ff2aa54680 Mon Sep 17 00:00:00 2001 From: mutricyl <118692416+mutricyl@users.noreply.github.com> Date: Mon, 1 Jul 2024 20:22:59 +0200 Subject: [PATCH 10/10] Update pandas/tests/computation/test_eval.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/tests/computation/test_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 3206cf339adb4..1844b47847e95 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -773,7 +773,7 @@ def test_binop_typecasting( "to complex 128 " "https://github.com/pydata/numexpr/issues/492" ) - request.node.add_marker(mark) + request.applymarker(mark) assert df.values.dtype == dtype assert res.values.dtype == dtype tm.assert_frame_equal(res, eval(s), check_exact=False)