Skip to content

Commit 75daea4

Browse files
DEPR: be stricter in assert_almost_equal (#52081)
* DEPR: be stricter in assert_almost_equal * 32bit builds * Fix transform test * ignore warning i cant reproduce localy * pylint fixup * Fix AarrayManager and CoW builds * fix tests * Whatsnew --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 2a270d8 commit 75daea4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+345
-106
lines changed

doc/source/whatsnew/v2.1.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,10 @@ Deprecations
265265
- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
266266
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
267267
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
268+
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
268269
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
269270
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
271+
-
270272

271273
.. ---------------------------------------------------------------------------
272274
.. _whatsnew_210.performance:

pandas/_libs/testing.pyx

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
11
import cmath
22
import math
3+
import warnings
34

45
import numpy as np
56

67
from numpy cimport import_array
78

89
import_array()
910

10-
from pandas._libs.missing cimport checknull
11+
from pandas._libs.missing cimport (
12+
checknull,
13+
is_matching_na,
14+
)
1115
from pandas._libs.util cimport (
1216
is_array,
1317
is_complex_object,
1418
is_real_number_object,
1519
)
1620

21+
from pandas.util._exceptions import find_stack_level
22+
1723
from pandas.core.dtypes.missing import array_equivalent
1824

1925

@@ -176,13 +182,23 @@ cpdef assert_almost_equal(a, b,
176182
# classes can't be the same, to raise error
177183
assert_class_equal(a, b, obj=obj)
178184

179-
if checknull(a) and checknull(b):
180-
# TODO: Should require same-dtype NA?
185+
if checknull(a):
181186
# nan / None comparison
182-
return True
183-
184-
if (checknull(a) and not checknull(b)) or (not checknull(a) and checknull(b)):
185-
# boolean value of pd.NA is ambiguous
187+
if is_matching_na(a, b, nan_matches_none=False):
188+
return True
189+
elif checknull(b):
190+
# GH#18463
191+
warnings.warn(
192+
f"Mismatched null-like values {a} and {b} found. In a future "
193+
"version, pandas equality-testing functions "
194+
"(e.g. assert_frame_equal) will consider these not-matching "
195+
"and raise.",
196+
FutureWarning,
197+
stacklevel=find_stack_level(),
198+
)
199+
return True
200+
raise AssertionError(f"{a} != {b}")
201+
elif checknull(b):
186202
raise AssertionError(f"{a} != {b}")
187203

188204
if a == b:

pandas/core/dtypes/missing.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
TYPE_CHECKING,
1010
overload,
1111
)
12+
import warnings
1213

1314
import numpy as np
1415

@@ -573,17 +574,20 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo
573574
if not isinstance(right_value, float) or not np.isnan(right_value):
574575
return False
575576
else:
576-
try:
577-
if np.any(np.asarray(left_value != right_value)):
577+
with warnings.catch_warnings():
578+
# suppress numpy's "elementwise comparison failed"
579+
warnings.simplefilter("ignore", DeprecationWarning)
580+
try:
581+
if np.any(np.asarray(left_value != right_value)):
582+
return False
583+
except TypeError as err:
584+
if "boolean value of NA is ambiguous" in str(err):
585+
return False
586+
raise
587+
except ValueError:
588+
# numpy can raise a ValueError if left and right cannot be
589+
# compared (e.g. nested arrays)
578590
return False
579-
except TypeError as err:
580-
if "boolean value of NA is ambiguous" in str(err):
581-
return False
582-
raise
583-
except ValueError:
584-
# numpy can raise a ValueError if left and right cannot be
585-
# compared (e.g. nested arrays)
586-
return False
587591
return True
588592

589593

pandas/tests/arithmetic/test_timedelta64.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2037,6 +2037,10 @@ def test_td64arr_div_numeric_array(
20372037
if box_with_array is DataFrame:
20382038
expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))]
20392039
expected = tm.box_expected(expected, xbox).astype(object)
2040+
# We specifically expect timedelta64("NaT") here, not pd.NA
2041+
expected[2] = expected[2].fillna(
2042+
np.timedelta64("NaT", "ns"), downcast=False
2043+
)
20402044
else:
20412045
expected = [tdser[n] / vector[n] for n in range(len(tdser))]
20422046
expected = [
@@ -2113,9 +2117,12 @@ def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array):
21132117
left = tm.box_expected(tdi, box_with_array)
21142118
right = np.array([2, 2.0], dtype=object)
21152119

2116-
expected = Index([np.timedelta64("NaT", "ns")] * 2, dtype=object)
2120+
tdnat = np.timedelta64("NaT", "ns")
2121+
expected = Index([tdnat] * 2, dtype=object)
21172122
if box_with_array is not Index:
21182123
expected = tm.box_expected(expected, box_with_array).astype(object)
2124+
if box_with_array in [Series, DataFrame]:
2125+
expected = expected.fillna(tdnat, downcast=False) # GH#18463
21192126

21202127
result = left / right
21212128
tm.assert_equal(result, expected)

pandas/tests/arrays/integer/test_arithmetic.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@ def test_error_invalid_values(data, all_arithmetic_operators):
204204
]: # (data[~data.isna()] >= 0).all():
205205
res = ops(str_ser)
206206
expected = pd.Series(["foo" * x for x in data], index=s.index)
207+
expected = expected.fillna(np.nan)
208+
# TODO: doing this fillna to keep tests passing as we make
209+
# assert_almost_equal stricter, but the expected with pd.NA seems
210+
# more-correct than np.nan here.
207211
tm.assert_series_equal(res, expected)
208212
else:
209213
with pytest.raises(TypeError, match=msg):

pandas/tests/arrays/integer/test_construction.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def test_conversions(data_missing):
5151
# astype to object series
5252
df = pd.DataFrame({"A": data_missing})
5353
result = df["A"].astype("object")
54-
expected = pd.Series(np.array([np.nan, 1], dtype=object), name="A")
54+
expected = pd.Series(np.array([pd.NA, 1], dtype=object), name="A")
5555
tm.assert_series_equal(result, expected)
5656

5757
# convert to object ndarray

pandas/tests/extension/test_boolean.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
import numpy as np
1717
import pytest
1818

19-
from pandas.core.dtypes.common import is_bool_dtype
19+
from pandas.compat import (
20+
IS64,
21+
is_platform_windows,
22+
)
2023

2124
import pandas as pd
2225
import pandas._testing as tm
@@ -382,11 +385,18 @@ class TestUnaryOps(base.BaseUnaryOpsTests):
382385

383386
class TestAccumulation(base.BaseAccumulateTests):
384387
def check_accumulate(self, s, op_name, skipna):
388+
length = 64
389+
if not IS64 or is_platform_windows():
390+
if not s.dtype.itemsize == 8:
391+
length = 32
392+
385393
result = getattr(s, op_name)(skipna=skipna)
386394
expected = getattr(pd.Series(s.astype("float64")), op_name)(skipna=skipna)
387-
tm.assert_series_equal(result, expected, check_dtype=False)
388-
if op_name in ("cummin", "cummax"):
389-
assert is_bool_dtype(result)
395+
if op_name not in ("cummin", "cummax"):
396+
expected = expected.astype(f"Int{length}")
397+
else:
398+
expected = expected.astype("boolean")
399+
tm.assert_series_equal(result, expected)
390400

391401
@pytest.mark.parametrize("skipna", [True, False])
392402
def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna):

pandas/tests/frame/methods/test_compare.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,10 @@ def test_compare_ea_and_np_dtype(val1, val2):
265265
("b", "other"): np.nan,
266266
}
267267
)
268+
if val1 is pd.NA and val2 is pd.NA:
269+
# GH#18463 TODO: is this really the desired behavior?
270+
expected.loc[1, ("a", "self")] = np.nan
271+
268272
if val1 is pd.NA and is_numpy_dev:
269273
# can't compare with numpy array if it contains pd.NA
270274
with pytest.raises(TypeError, match="boolean value of NA is ambiguous"):

pandas/tests/frame/methods/test_quantile.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,9 @@ def test_quantile_empty_no_rows_dt64(self, interp_method):
734734
0.5, numeric_only=False, interpolation=interpolation, method=method
735735
)
736736
exp = exp.astype(object)
737+
if interpolation == "nearest":
738+
# GH#18463 TODO: would we prefer NaTs here?
739+
exp = exp.fillna(np.nan, downcast=False)
737740
tm.assert_series_equal(res, exp)
738741

739742
# both dt64tz

pandas/tests/frame/methods/test_reindex.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,13 @@ def test_reindex_timestamp_with_fold(self, timezone, year, month, day, hour):
112112
.set_index("index")
113113
.reindex(["1", "2"])
114114
)
115+
exp = DataFrame({"index": ["1", "2"], "vals": [np.nan, np.nan]}).set_index(
116+
"index"
117+
)
118+
exp = exp.astype(object)
115119
tm.assert_frame_equal(
116120
df,
117-
DataFrame({"index": ["1", "2"], "vals": [None, None]}).set_index("index"),
121+
exp,
118122
)
119123

120124

@@ -1191,7 +1195,7 @@ def test_reindex_empty_frame(self, kwargs):
11911195
idx = date_range(start="2020", freq="30s", periods=3)
11921196
df = DataFrame([], index=Index([], name="time"), columns=["a"])
11931197
result = df.reindex(idx, **kwargs)
1194-
expected = DataFrame({"a": [pd.NA] * 3}, index=idx)
1198+
expected = DataFrame({"a": [np.nan] * 3}, index=idx, dtype=object)
11951199
tm.assert_frame_equal(result, expected)
11961200

11971201
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)