Skip to content

BUG: Decimal(NaN) incorrectly allowed in ArrowEA constructor with tim… #61773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,8 @@ Datetimelike
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
- Bug in :meth:`to_datetime` with ``format="ISO8601"`` and ``utc=True`` where naive timestamps incorrectly inherited timezone offset from previous timestamps in a series. (:issue:`61389`)
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
- Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)

Timedelta
Expand Down
28 changes: 28 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
from pandas.core.arrays.masked import BaseMaskedArray
from pandas.core.arrays.string_ import StringDtype
import pandas.core.common as com
from pandas.core.construction import extract_array
from pandas.core.indexers import (
check_array_indexer,
unpack_tuple_and_ellipses,
Expand Down Expand Up @@ -500,6 +501,33 @@ def _box_pa_array(
value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
value = value.to_numpy()

if pa_type is not None and pa.types.is_timestamp(pa_type):
# Use DatetimeArray to exclude Decimal(NaN) (GH#61774) and
# ensure constructor treats tznaive the same as non-pyarrow
# dtypes (GH#61775)
from pandas.core.arrays.datetimes import (
DatetimeArray,
tz_to_dtype,
)

pass_dtype = tz_to_dtype(tz=pa_type.tz, unit=pa_type.unit)
value = extract_array(value, extract_numpy=True)
if isinstance(value, DatetimeArray):
dta = value
else:
dta = DatetimeArray._from_sequence(
value, copy=copy, dtype=pass_dtype
)
mask = dta.isna()
value_i8 = dta.view("i8")
if not np.asarray(value_i8).flags["WRITEABLE"]:
# e.g. test_setitem_frame_2d_values
value_i8 = value_i8.copy()
dta = DatetimeArray._from_sequence(value_i8, dtype=dta.dtype)
value_i8[mask] = 0 # GH#61776 avoid __sub__ overflow
pa_array = pa.array(dta._ndarray, type=pa_type, mask=mask)
return pa_array

try:
pa_array = pa.array(value, type=pa_type, from_pandas=True)
except (pa.ArrowInvalid, pa.ArrowTypeError):
Expand Down
34 changes: 31 additions & 3 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2682,14 +2682,15 @@ def test_dt_tz_localize_unsupported_tz_options():
ser.dt.tz_localize("UTC", nonexistent="NaT")


@pytest.mark.xfail(reason="Converts to UTC before localizing GH#61780")
def test_dt_tz_localize_none():
ser = pd.Series(
[datetime(year=2023, month=1, day=2, hour=3), None],
dtype=ArrowDtype(pa.timestamp("ns", tz="US/Pacific")),
)
result = ser.dt.tz_localize(None)
expected = pd.Series(
[datetime(year=2023, month=1, day=2, hour=3), None],
[ser[0].tz_localize(None), None],
dtype=ArrowDtype(pa.timestamp("ns")),
)
tm.assert_series_equal(result, expected)
Expand Down Expand Up @@ -2749,7 +2750,7 @@ def test_dt_tz_convert_none():
)
result = ser.dt.tz_convert(None)
expected = pd.Series(
[datetime(year=2023, month=1, day=2, hour=3), None],
[ser[0].tz_convert(None), None],
dtype=ArrowDtype(pa.timestamp("ns")),
)
tm.assert_series_equal(result, expected)
Expand All @@ -2763,7 +2764,7 @@ def test_dt_tz_convert(unit):
)
result = ser.dt.tz_convert("US/Eastern")
expected = pd.Series(
[datetime(year=2023, month=1, day=2, hour=3), None],
[ser[0].tz_convert("US/Eastern"), None],
dtype=ArrowDtype(pa.timestamp(unit, "US/Eastern")),
)
tm.assert_series_equal(result, expected)
Expand Down Expand Up @@ -3544,3 +3545,30 @@ def test_arrow_json_type():
dtype = ArrowDtype(pa.json_(pa.string()))
result = dtype.type
assert result == str


def test_timestamp_dtype_disallows_decimal():
# GH#61773 constructing with pyarrow timestamp dtype should disallow
# Decimal NaN, just like pd.to_datetime
vals = [pd.Timestamp("2016-01-02 03:04:05"), Decimal("NaN")]

msg = "<class 'decimal.Decimal'> is not convertible to datetime"
with pytest.raises(TypeError, match=msg):
# Check that the non-pyarrow version raises as expected
pd.to_datetime(vals)

with pytest.raises(TypeError, match=msg):
pd.array(vals, dtype=ArrowDtype(pa.timestamp("us")))


def test_timestamp_dtype_matches_to_datetime():
# GH#61775
dtype1 = "datetime64[ns, US/Eastern]"
dtype2 = "timestamp[ns, US/Eastern][pyarrow]"

ts = pd.Timestamp("2025-07-03 18:10")

result = pd.Series([ts], dtype=dtype2)
expected = pd.Series([ts], dtype=dtype1).convert_dtypes(dtype_backend="pyarrow")

tm.assert_series_equal(result, expected)
7 changes: 7 additions & 0 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
time,
timedelta,
)
from decimal import Decimal
from io import StringIO
from pathlib import Path
import sqlite3
Expand Down Expand Up @@ -1038,6 +1039,12 @@ def test_dataframe_to_sql_arrow_dtypes(conn, request):
def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture):
# GH 52046
pytest.importorskip("pyarrow")
if isinstance(nulls_fixture, Decimal):
pytest.skip(
# GH#61773
reason="Decimal('NaN') not supported in constructor for timestamp dtype"
)

df = DataFrame(
{
"datetime": pd.array(
Expand Down
Loading