Skip to content

BUG: Fix lost precision with common type of uint64/int64 #61679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ Other API changes
- Index set operations (like union or intersection) will now ignore the dtype of
an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
the dtype of the resulting Index (:issue:`60797`)
- ``np_find_common_type`` will now return ``object`` for mixed ``int64`` and ``uint64`` dtypes to avoid precision lost (:issue:`61676`, :issue:`61688`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi,

I'm just skimming issues so have not looked in detail.

By glancing through I see we are changing tested behavior. This sort of suggests intentional behavior and not a bug per se. If it is a longstanding behavior we may want to deprecate first as a breaking API change?

In pandas we normally want to avoid object type, I don't see any discussion that this is an agreed way forward?


.. ---------------------------------------------------------------------------
.. _whatsnew_300.deprecations:
Expand Down
12 changes: 0 additions & 12 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,13 @@
is_bool_dtype,
is_complex_dtype,
is_dict_like,
is_dtype_equal,
is_extension_array_dtype,
is_float,
is_float_dtype,
is_integer,
is_integer_dtype,
is_list_like,
is_object_dtype,
is_signed_integer_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.concat import concat_compat
Expand Down Expand Up @@ -508,16 +506,6 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
orig_values = list(values)
values = _ensure_arraylike(orig_values, func_name="isin-targets")

if (
len(values) > 0
and values.dtype.kind in "iufcb"
and not is_signed_integer_dtype(comps)
and not is_dtype_equal(values, comps)
):
# GH#46485 Use object to avoid upcast to float64 later
# TODO: Share with _find_common_type_compat
values = construct_1d_object_array_from_listlike(orig_values)

elif isinstance(values, ABCMultiIndex):
# Avoid raising in extract_array
values = np.array(values)
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1418,6 +1418,13 @@ def np_find_common_type(*dtypes: np.dtype) -> np.dtype:
# so fall back to object (find_common_dtype did unless there
# was only one dtype)
common_dtype = np.dtype("O")
elif (
# Some precision is lost with float64 when handling uint64/int64
# Use object instead for the common type
all(np.dtype(x).kind in "iu" and np.dtype(x).itemsize == 8 for x in dtypes)
and common_dtype == np.float64
):
common_dtype = np.dtype("O")

except TypeError:
common_dtype = np.dtype("O")
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/integer/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
(["Int8", "Int16"], "Int16"),
(["UInt8", "Int8"], "Int16"),
(["Int32", "UInt32"], "Int64"),
(["Int64", "UInt64"], "Float64"),
(["Int64", "UInt64"], "object"),
(["Int64", "boolean"], "object"),
(["UInt8", "boolean"], "object"),
],
Expand Down Expand Up @@ -48,7 +48,7 @@ def test_concat_series(to_concat_dtypes, result_dtype):
(["Int8", "int16"], "Int16"),
(["UInt8", "int8"], "Int16"),
(["Int32", "uint32"], "Int64"),
(["Int64", "uint64"], "Float64"),
(["Int64", "uint64"], "object"),
(["Int64", "bool"], "object"),
(["UInt8", "bool"], "object"),
],
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/dtypes/cast/test_find_common_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
((np.float16, np.float32), np.float32),
((np.float16, np.int16), np.float32),
((np.float32, np.int16), np.float32),
((np.uint64, np.int64), np.float64),
((np.uint64, np.int64), object),
((np.int16, np.float64), np.float64),
((np.float16, np.int64), np.float64),
# Into others.
Expand Down Expand Up @@ -155,9 +155,16 @@ def test_interval_dtype(left, right):
elif left.subtype.kind in ["i", "u", "f"]:
# i.e. numeric
if right.subtype.kind in ["i", "u", "f"]:
# both numeric -> common numeric subtype
expected = IntervalDtype(np.float64, "right")
assert result == expected
if (
left.subtype.kind in ["i", "u"]
and right.subtype.kind in ["i", "u"]
and left.subtype.kind != right.subtype.kind
):
assert result == object
else:
# both numeric -> common numeric subtype
expected = IntervalDtype(np.float64, "right")
assert result == expected
else:
assert result == object

Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/indexes/interval/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def test_union_empty_result(self, closed, sort):
tm.assert_index_equal(result, expected)

other = empty_index(dtype="uint64", closed=closed)
expected = Index([], dtype="object")
result = index.union(other, sort=sort)
tm.assert_index_equal(result, expected)

Expand Down Expand Up @@ -117,6 +118,7 @@ def test_intersection_empty_result(self, closed, sort):
tm.assert_index_equal(result, expected)

other = monotonic_index(300, 314, dtype="uint64", closed=closed)
expected = Index([], dtype="object")
result = index.intersection(other, sort=sort)
tm.assert_index_equal(result, expected)

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,13 @@ def test_isin_unsigned_dtype(self):
expected = Series(False)
tm.assert_series_equal(result, expected)

def test_isin_unsigned_dtype_other_side(self):
# GH#46485
ser = Series([1378774140726870442], dtype=np.int64)
result = ser.isin([np.uint64(1378774140726870528)])
expected = Series(False)
tm.assert_series_equal(result, expected)


class TestValueCounts:
def test_value_counts(self):
Expand Down
Loading