diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4154942f92907..10fb9503ffb3d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -706,6 +706,8 @@ Datetimelike - Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`) - Bug in :meth:`to_datetime` with ``format="ISO8601"`` and ``utc=True`` where naive timestamps incorrectly inherited timezone offset from previous timestamps in a series. (:issue:`61389`) - Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`) +- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`) +- Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) Timedelta diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b4e60819b033f..3dc03d9cbf3a2 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -63,6 +63,7 @@ from pandas.core.arrays.masked import BaseMaskedArray from pandas.core.arrays.string_ import StringDtype import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.indexers import ( check_array_indexer, unpack_tuple_and_ellipses, @@ -500,6 +501,33 @@ def _box_pa_array( value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit) value = value.to_numpy() + if pa_type is not None and pa.types.is_timestamp(pa_type): + # Use DatetimeArray to exclude Decimal(NaN) (GH#61774) and + # ensure constructor treats tznaive the same as non-pyarrow + # dtypes (GH#61775) + from pandas.core.arrays.datetimes import ( + DatetimeArray, + tz_to_dtype, + ) + + pass_dtype = tz_to_dtype(tz=pa_type.tz, unit=pa_type.unit) + value = extract_array(value, extract_numpy=True) + if isinstance(value, DatetimeArray): + dta = value + else: + dta = DatetimeArray._from_sequence( + value, copy=copy, dtype=pass_dtype + ) + dta_mask = dta.isna() + value_i8 = cast("npt.NDArray", dta.view("i8")) + if not value_i8.flags["WRITEABLE"]: + # e.g. test_setitem_frame_2d_values + value_i8 = value_i8.copy() + dta = DatetimeArray._from_sequence(value_i8, dtype=dta.dtype) + value_i8[dta_mask] = 0 # GH#61776 avoid __sub__ overflow + pa_array = pa.array(dta._ndarray, type=pa_type, mask=dta_mask) + return pa_array + try: pa_array = pa.array(value, type=pa_type, from_pandas=True) except (pa.ArrowInvalid, pa.ArrowTypeError): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 8db837b176fe9..7e7cd8fb13456 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2686,6 +2686,7 @@ def test_dt_tz_localize_unsupported_tz_options(): ser.dt.tz_localize("UTC", nonexistent="NaT") +@pytest.mark.xfail(reason="Converts to UTC before localizing GH#61780") def test_dt_tz_localize_none(): ser = pd.Series( [datetime(year=2023, month=1, day=2, hour=3), None], @@ -2693,7 +2694,7 @@ def test_dt_tz_localize_none(): ) result = ser.dt.tz_localize(None) expected = pd.Series( - [datetime(year=2023, month=1, day=2, hour=3), None], + [ser[0].tz_localize(None), None], dtype=ArrowDtype(pa.timestamp("ns")), ) tm.assert_series_equal(result, expected) @@ -2753,7 +2754,7 @@ def test_dt_tz_convert_none(): ) result = ser.dt.tz_convert(None) expected = pd.Series( - [datetime(year=2023, month=1, day=2, hour=3), None], + [ser[0].tz_convert(None), None], dtype=ArrowDtype(pa.timestamp("ns")), ) tm.assert_series_equal(result, expected) @@ -2767,7 +2768,7 @@ def test_dt_tz_convert(unit): ) result = ser.dt.tz_convert("US/Eastern") expected = pd.Series( - [datetime(year=2023, month=1, day=2, hour=3), None], + [ser[0].tz_convert("US/Eastern"), None], dtype=ArrowDtype(pa.timestamp(unit, "US/Eastern")), ) tm.assert_series_equal(result, expected) @@ -3548,3 +3549,30 @@ def test_arrow_json_type(): dtype = ArrowDtype(pa.json_(pa.string())) result = dtype.type assert result == str + + +def test_timestamp_dtype_disallows_decimal(): + # GH#61773 constructing with pyarrow timestamp dtype should disallow + # Decimal NaN, just like pd.to_datetime + vals = [pd.Timestamp("2016-01-02 03:04:05"), Decimal("NaN")] + + msg = " is not convertible to datetime" + with pytest.raises(TypeError, match=msg): + # Check that the non-pyarrow version raises as expected + pd.to_datetime(vals) + + with pytest.raises(TypeError, match=msg): + pd.array(vals, dtype=ArrowDtype(pa.timestamp("us"))) + + +def test_timestamp_dtype_matches_to_datetime(): + # GH#61775 + dtype1 = "datetime64[ns, US/Eastern]" + dtype2 = "timestamp[ns, US/Eastern][pyarrow]" + + ts = pd.Timestamp("2025-07-03 18:10") + + result = pd.Series([ts], dtype=dtype2) + expected = pd.Series([ts], dtype=dtype1).convert_dtypes(dtype_backend="pyarrow") + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 4a6a5635eb68c..6f4c1602a5e64 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -8,6 +8,7 @@ time, timedelta, ) +from decimal import Decimal from io import StringIO from pathlib import Path import sqlite3 @@ -1038,6 +1039,12 @@ def test_dataframe_to_sql_arrow_dtypes(conn, request): def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture): # GH 52046 pytest.importorskip("pyarrow") + if isinstance(nulls_fixture, Decimal): + pytest.skip( + # GH#61773 + reason="Decimal('NaN') not supported in constructor for timestamp dtype" + ) + df = DataFrame( { "datetime": pd.array(