diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index a100af56faab8..82e9812094af2 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1471,7 +1471,9 @@ def _maybe_upcast( elif arr.dtype == np.object_: if use_dtype_backend: - arr = StringDtype().construct_array_type()._from_sequence(arr) + dtype = StringDtype() + cls = dtype.construct_array_type() + arr = cls._from_sequence(arr, dtype=dtype) if use_dtype_backend and dtype_backend == "pyarrow": import pyarrow as pa diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index d8b074fe61322..9f0a039126d1d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -40,6 +40,7 @@ is_integer, is_list_like, is_scalar, + pandas_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna @@ -273,6 +274,10 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal """ Construct a new ExtensionArray from a sequence of scalars. """ + if dtype is not None and isinstance(dtype, str): + # FIXME: in tests.extension.test_arrow we pass pyarrow _type_ objects + # which raise when passed to pandas_dtype + dtype = pandas_dtype(dtype) pa_type = to_pyarrow_type(dtype) pa_array = cls._box_pa_array(scalars, pa_type=pa_type, copy=copy) arr = cls(pa_array) diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 0e86c1efba17a..210450e868698 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -132,9 +132,12 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr raise AbstractMethodError(cls) -def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype): +def _coerce_to_data_and_mask( + values, dtype, copy: bool, dtype_cls: type[NumericDtype], default_dtype: np.dtype +): checker = dtype_cls._checker + mask = None inferred_type = None if dtype is None and hasattr(values, "dtype"): @@ -190,7 +193,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype if dtype is None: dtype = default_dtype else: - dtype = dtype.type + dtype = dtype.numpy_dtype if is_integer_dtype(dtype) and values.dtype.kind == "f" and len(values) > 0: if mask.all(): @@ -260,9 +263,8 @@ def _coerce_to_array( ) -> tuple[np.ndarray, np.ndarray]: dtype_cls = cls._dtype_cls default_dtype = dtype_cls._default_np_dtype - mask = None values, mask, _, _ = _coerce_to_data_and_mask( - value, mask, dtype, copy, dtype_cls, default_dtype + value, dtype, copy, dtype_cls, default_dtype ) return values, mask diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index e3492dd21ea57..10641193a5cc1 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -1090,7 +1090,9 @@ def period_array( return PeriodArray(ordinals, dtype=dtype) data = ensure_object(arrdata) - + if freq is None: + freq = libperiod.extract_freq(data) + dtype = PeriodDtype(freq) return PeriodArray._from_sequence(data, dtype=dtype) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8cb76e57eba7e..12a0486853f15 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -349,7 +349,9 @@ def array( elif inferred_dtype == "string": # StringArray/ArrowStringArray depending on pd.options.mode.string_storage - return StringDtype().construct_array_type()._from_sequence(data, copy=copy) + dtype = StringDtype() + cls = dtype.construct_array_type() + return cls._from_sequence(data, dtype=dtype, copy=copy) elif inferred_dtype == "integer": return IntegerArray._from_sequence(data, copy=copy) @@ -364,7 +366,7 @@ def array( return FloatingArray._from_sequence(data, copy=copy) elif inferred_dtype == "boolean": - return BooleanArray._from_sequence(data, copy=copy) + return BooleanArray._from_sequence(data, dtype="boolean", copy=copy) # Pandas overrides NumPy for # 1. datetime64[ns,us,ms,s] diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e51983f0aabb7..aa78bbe1c3ec1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2330,7 +2330,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike: elif isinstance(bvalues, ArrowExtensionArray) and not isinstance( bvalues.dtype, StringDtype ): - return type(bvalues)._from_sequence(counted[0]) + return type(bvalues)._from_sequence(counted[0], dtype="int64[pyarrow]") if is_series: assert counted.ndim == 2 assert counted.shape[0] == 1 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5dc4a85ba9792..18d6834e6191c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5194,12 +5194,12 @@ def _get_join_target(self) -> np.ndarray: def _from_join_target(self, result: np.ndarray) -> ArrayLike: """ Cast the ndarray returned from one of the libjoin.foo_indexer functions - back to type(self)._data. + back to type(self._data). """ if isinstance(self.values, BaseMaskedArray): return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_)) elif isinstance(self.values, (ArrowExtensionArray, StringArray)): - return type(self.values)._from_sequence(result) + return type(self.values)._from_sequence(result, dtype=self.dtype) return result @doc(IndexOpsMixin._memory_usage) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 57dd310f6b12c..609d2c9a7a285 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -1044,7 +1044,9 @@ def convert(arr): # i.e. maybe_convert_objects didn't convert arr = maybe_infer_to_datetimelike(arr) if dtype_backend != "numpy" and arr.dtype == np.dtype("O"): - arr = StringDtype().construct_array_type()._from_sequence(arr) + new_dtype = StringDtype() + arr_cls = new_dtype.construct_array_type() + arr = arr_cls._from_sequence(arr, dtype=new_dtype) elif dtype_backend != "numpy" and isinstance(arr, np.ndarray): if arr.dtype.kind in "iufb": arr = pd_array(arr, copy=False) diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 6993ae3235943..0029beccc40a8 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -207,7 +207,7 @@ def rep(x, r): ) if isinstance(self, BaseStringArray): # Not going through map, so we have to do this here. - result = type(self)._from_sequence(result) + result = type(self)._from_sequence(result, dtype=self.dtype) return result def _str_match( diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 3ceb798a7f5ca..09f0f2af8e5c6 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -757,7 +757,9 @@ def _infer_types( elif result.dtype == np.object_ and non_default_dtype_backend: # read_excel sends array of datetime objects if not lib.is_datetime_array(result, skipna=True): - result = StringDtype().construct_array_type()._from_sequence(values) + dtype = StringDtype() + cls = dtype.construct_array_type() + result = cls._from_sequence(values, dtype=dtype) if dtype_backend == "pyarrow": pa = import_optional_dependency("pyarrow") diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 88c633f5e747f..5535fe8ff928d 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1282,7 +1282,7 @@ def test_parr_add_sub_td64_nat(self, box_with_array, transpose): "other", [ np.array(["NaT"] * 9, dtype="m8[ns]"), - TimedeltaArray._from_sequence(["NaT"] * 9), + TimedeltaArray._from_sequence(["NaT"] * 9, dtype="m8[ns]"), ], ) def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 50aaa42e09f22..36aba388f1b30 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -745,7 +745,9 @@ def test_interval(self): def test_categorical_extension_array_nullable(self, nulls_fixture): # GH: - arr = pd.arrays.StringArray._from_sequence([nulls_fixture] * 2) + arr = pd.arrays.StringArray._from_sequence( + [nulls_fixture] * 2, dtype=pd.StringDtype() + ) result = Categorical(arr) assert arr.dtype == result.categories.dtype expected = Categorical(Series([pd.NA, pd.NA], dtype=arr.dtype)) diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py index 3509206cccb82..97fa6e8d529b7 100644 --- a/pandas/tests/arrays/datetimes/test_constructors.py +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -14,7 +14,7 @@ class TestDatetimeArrayConstructor: def test_from_sequence_invalid_type(self): mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): - DatetimeArray._from_sequence(mi) + DatetimeArray._from_sequence(mi, dtype="M8[ns]") def test_only_1dim_accepted(self): arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") @@ -66,7 +66,7 @@ def test_mixing_naive_tzaware_raises(self, meth): def test_from_pandas_array(self): arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9 - result = DatetimeArray._from_sequence(arr)._with_freq("infer") + result = DatetimeArray._from_sequence(arr, dtype="M8[ns]")._with_freq("infer") expected = pd.date_range("1970-01-01", periods=5, freq="h")._data tm.assert_datetime_array_equal(result, expected) @@ -100,7 +100,7 @@ def test_bool_dtype_raises(self): msg = r"dtype bool cannot be converted to datetime64\[ns\]" with pytest.raises(TypeError, match=msg): - DatetimeArray._from_sequence(arr) + DatetimeArray._from_sequence(arr, dtype="M8[ns]") with pytest.raises(TypeError, match=msg): pd.DatetimeIndex(arr) @@ -171,8 +171,10 @@ def test_2d(self, order): if order == "F": arr = arr.T - res = DatetimeArray._from_sequence(arr) - expected = DatetimeArray._from_sequence(arr.ravel()).reshape(arr.shape) + res = DatetimeArray._from_sequence(arr, dtype=dti.dtype) + expected = DatetimeArray._from_sequence(arr.ravel(), dtype=dti.dtype).reshape( + arr.shape + ) tm.assert_datetime_array_equal(res, expected) diff --git a/pandas/tests/arrays/datetimes/test_cumulative.py b/pandas/tests/arrays/datetimes/test_cumulative.py index 2d61dba212064..428d6b13d28ed 100644 --- a/pandas/tests/arrays/datetimes/test_cumulative.py +++ b/pandas/tests/arrays/datetimes/test_cumulative.py @@ -12,10 +12,11 @@ def test_accumulators_freq(self): "2000-01-01", "2000-01-02", "2000-01-03", - ] + ], + dtype="M8[ns]", )._with_freq("infer") result = arr._accumulate("cummin") - expected = DatetimeArray._from_sequence(["2000-01-01"] * 3) + expected = DatetimeArray._from_sequence(["2000-01-01"] * 3, dtype="M8[ns]") tm.assert_datetime_array_equal(result, expected) result = arr._accumulate("cummax") @@ -36,6 +37,7 @@ def test_accumulators_disallowed(self, func): "2000-01-01", "2000-01-02", ], + dtype="M8[ns]", )._with_freq("infer") with pytest.raises(TypeError, match=f"Accumulation {func}"): arr._accumulate(func) diff --git a/pandas/tests/arrays/datetimes/test_reductions.py b/pandas/tests/arrays/datetimes/test_reductions.py index 59a4443ac9e19..a941546b13a56 100644 --- a/pandas/tests/arrays/datetimes/test_reductions.py +++ b/pandas/tests/arrays/datetimes/test_reductions.py @@ -124,7 +124,7 @@ def test_median_2d(self, arr1d): # axis = 1 result = arr.median(axis=1) - expected = type(arr)._from_sequence([arr1d.median()]) + expected = type(arr)._from_sequence([arr1d.median()], dtype=arr.dtype) tm.assert_equal(result, expected) result = arr.median(axis=1, skipna=False) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index d015e899c4231..1532a20126853 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -64,14 +64,14 @@ def test_repr(dtype): assert repr(df.A.array) == expected -def test_none_to_nan(cls): - a = cls._from_sequence(["a", None, "b"]) +def test_none_to_nan(cls, dtype): + a = cls._from_sequence(["a", None, "b"], dtype=dtype) assert a[1] is not None assert a[1] is na_val(a.dtype) -def test_setitem_validates(cls): - arr = cls._from_sequence(["a", "b"]) +def test_setitem_validates(cls, dtype): + arr = cls._from_sequence(["a", "b"], dtype=dtype) if cls is pd.arrays.StringArray: msg = "Cannot set non-string value '10' into a StringArray." @@ -361,12 +361,12 @@ def test_constructor_nan_like(na): @pytest.mark.parametrize("copy", [True, False]) -def test_from_sequence_no_mutate(copy, cls, request): +def test_from_sequence_no_mutate(copy, cls, dtype): nan_arr = np.array(["a", np.nan], dtype=object) expected_input = nan_arr.copy() na_arr = np.array(["a", pd.NA], dtype=object) - result = cls._from_sequence(nan_arr, copy=copy) + result = cls._from_sequence(nan_arr, dtype=dtype, copy=copy) if cls in (ArrowStringArray, ArrowStringArrayNumpySemantics): import pyarrow as pa @@ -436,7 +436,7 @@ def test_reduce_missing(skipna, dtype): @pytest.mark.parametrize("method", ["min", "max"]) @pytest.mark.parametrize("skipna", [True, False]) -def test_min_max(method, skipna, dtype, request): +def test_min_max(method, skipna, dtype): arr = pd.Series(["a", "b", "c", None], dtype=dtype) result = getattr(arr, method)(skipna=skipna) if skipna: diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index a022dfffbdd2b..222b77cb4e94f 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -34,9 +34,8 @@ def test_config(string_storage, request, using_infer_string): result = pd.array(["a", "b"]) assert result.dtype.storage == string_storage - expected = ( - StringDtype(string_storage).construct_array_type()._from_sequence(["a", "b"]) - ) + dtype = StringDtype(string_storage) + expected = dtype.construct_array_type()._from_sequence(["a", "b"], dtype=dtype) tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index b0ec2787097f0..5d4aa54d1b4b9 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -47,8 +47,8 @@ def test_dt64_array(dtype_unit): "data, dtype, expected", [ # Basic NumPy defaults. - ([], None, FloatingArray._from_sequence([])), - ([1, 2], None, IntegerArray._from_sequence([1, 2])), + ([], None, FloatingArray._from_sequence([], dtype="Float64")), + ([1, 2], None, IntegerArray._from_sequence([1, 2], dtype="Int64")), ([1, 2], object, NumpyExtensionArray(np.array([1, 2], dtype=object))), ( [1, 2], @@ -64,7 +64,7 @@ def test_dt64_array(dtype_unit): ( np.array([1.0, 2.0], dtype="float64"), None, - FloatingArray._from_sequence([1.0, 2.0]), + FloatingArray._from_sequence([1.0, 2.0], dtype="Float64"), ), # String alias passes through to NumPy ([1, 2], "float32", NumpyExtensionArray(np.array([1, 2], dtype="float32"))), @@ -98,17 +98,23 @@ def test_dt64_array(dtype_unit): ( [1, 2], np.dtype("datetime64[ns]"), - DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), + DatetimeArray._from_sequence( + np.array([1, 2], dtype="M8[ns]"), dtype="M8[ns]" + ), ), ( [1, 2], np.dtype("datetime64[s]"), - DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[s]")), + DatetimeArray._from_sequence( + np.array([1, 2], dtype="M8[s]"), dtype="M8[s]" + ), ), ( np.array([1, 2], dtype="datetime64[ns]"), None, - DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), + DatetimeArray._from_sequence( + np.array([1, 2], dtype="M8[ns]"), dtype="M8[ns]" + ), ), ( pd.DatetimeIndex(["2000", "2001"]), @@ -137,22 +143,24 @@ def test_dt64_array(dtype_unit): ( ["1h", "2h"], np.dtype("timedelta64[ns]"), - TimedeltaArray._from_sequence(["1h", "2h"]), + TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"), ), ( pd.TimedeltaIndex(["1h", "2h"]), np.dtype("timedelta64[ns]"), - TimedeltaArray._from_sequence(["1h", "2h"]), + TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"), ), ( np.array([1, 2], dtype="m8[s]"), np.dtype("timedelta64[s]"), - TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[s]")), + TimedeltaArray._from_sequence( + np.array([1, 2], dtype="m8[s]"), dtype="m8[s]" + ), ), ( pd.TimedeltaIndex(["1h", "2h"]), None, - TimedeltaArray._from_sequence(["1h", "2h"]), + TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"), ), ( # preserve non-nano, i.e. don't cast to NumpyExtensionArray @@ -200,16 +208,28 @@ def test_dt64_array(dtype_unit): ( ["a", None], "string", - pd.StringDtype().construct_array_type()._from_sequence(["a", None]), + pd.StringDtype() + .construct_array_type() + ._from_sequence(["a", None], dtype=pd.StringDtype()), ), ( ["a", None], pd.StringDtype(), - pd.StringDtype().construct_array_type()._from_sequence(["a", None]), + pd.StringDtype() + .construct_array_type() + ._from_sequence(["a", None], dtype=pd.StringDtype()), ), # Boolean - ([True, None], "boolean", BooleanArray._from_sequence([True, None])), - ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])), + ( + [True, None], + "boolean", + BooleanArray._from_sequence([True, None], dtype="boolean"), + ), + ( + [True, None], + pd.BooleanDtype(), + BooleanArray._from_sequence([True, None], dtype="boolean"), + ), # Index (pd.Index([1, 2]), None, NumpyExtensionArray(np.array([1, 2], dtype=np.int64))), # Series[EA] returns the EA @@ -416,7 +436,7 @@ def construct_array_type(cls): class DecimalArray2(DecimalArray): @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): if isinstance(scalars, (pd.Series, pd.Index)): raise TypeError("scalars should not be of type pd.Series or pd.Index") @@ -427,14 +447,15 @@ def test_array_unboxes(index_or_series): box = index_or_series data = box([decimal.Decimal("1"), decimal.Decimal("2")]) + dtype = DecimalDtype2() # make sure it works with pytest.raises( TypeError, match="scalars should not be of type pd.Series or pd.Index" ): - DecimalArray2._from_sequence(data) + DecimalArray2._from_sequence(data, dtype=dtype) result = pd.array(data, dtype="decimal2") - expected = DecimalArray2._from_sequence(data.values) + expected = DecimalArray2._from_sequence(data.values, dtype=dtype) tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 1cc6161d538f2..25c09d9397ccc 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1176,7 +1176,7 @@ def test_strftime_nat(self): ids=lambda x: type(x).__name__, ) def test_casting_nat_setitem_array(arr, casting_nats): - expected = type(arr)._from_sequence([NaT, arr[1], arr[2]]) + expected = type(arr)._from_sequence([NaT, arr[1], arr[2]], dtype=arr.dtype) for nat in casting_nats: arr = arr.copy() @@ -1314,8 +1314,8 @@ def test_from_pandas_array(dtype): expected = cls(data) tm.assert_extension_array_equal(result, expected) - result = cls._from_sequence(arr) - expected = cls._from_sequence(data) + result = cls._from_sequence(arr, dtype=dtype) + expected = cls._from_sequence(data, dtype=dtype) tm.assert_extension_array_equal(result, expected) func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype] diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1fa55f13af2a8..13736700c1a6f 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -390,7 +390,9 @@ def test_astype_copies(self, dtype, other): @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): - arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]) + arr = DatetimeArray._from_sequence( + [pd.Timestamp("2000"), pd.Timestamp("2001")], dtype="M8[ns]" + ) if np.dtype(dtype) != np.int64: with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"): @@ -781,7 +783,7 @@ def test_date_range_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr): def test_factorize_sort_without_freq(): - dta = DatetimeArray._from_sequence([0, 2, 1]) + dta = DatetimeArray._from_sequence([0, 2, 1], dtype="M8[ns]") msg = r"call pd.factorize\(obj, sort=True\) instead" with pytest.raises(NotImplementedError, match=msg): diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 21bc85a4d070e..3f5ee328bdfcf 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -196,7 +196,9 @@ def test_add_timedeltaarraylike(self, tda): class TestTimedeltaArray: @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): - arr = TimedeltaArray._from_sequence([Timedelta("1h"), Timedelta("2h")]) + arr = TimedeltaArray._from_sequence( + [Timedelta("1h"), Timedelta("2h")], dtype="m8[ns]" + ) if np.dtype(dtype) != np.int64: with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"): diff --git a/pandas/tests/arrays/timedeltas/test_cumulative.py b/pandas/tests/arrays/timedeltas/test_cumulative.py index 2668ea673fa40..2d8fe65f807e4 100644 --- a/pandas/tests/arrays/timedeltas/test_cumulative.py +++ b/pandas/tests/arrays/timedeltas/test_cumulative.py @@ -7,13 +7,14 @@ class TestAccumulator: def test_accumulators_disallowed(self): # GH#50297 - arr = TimedeltaArray._from_sequence(["1D", "2D"]) + arr = TimedeltaArray._from_sequence(["1D", "2D"], dtype="m8[ns]") with pytest.raises(TypeError, match="cumprod not supported"): arr._accumulate("cumprod") - def test_cumsum(self): + def test_cumsum(self, unit): # GH#50297 - arr = TimedeltaArray._from_sequence(["1D", "2D"]) + dtype = f"m8[{unit}]" + arr = TimedeltaArray._from_sequence(["1D", "2D"], dtype=dtype) result = arr._accumulate("cumsum") - expected = TimedeltaArray._from_sequence(["1D", "3D"]) + expected = TimedeltaArray._from_sequence(["1D", "3D"], dtype=dtype) tm.assert_timedelta_array_equal(result, expected) diff --git a/pandas/tests/arrays/timedeltas/test_reductions.py b/pandas/tests/arrays/timedeltas/test_reductions.py index 3718e7e646ea9..f1d2cc6a90519 100644 --- a/pandas/tests/arrays/timedeltas/test_reductions.py +++ b/pandas/tests/arrays/timedeltas/test_reductions.py @@ -34,8 +34,11 @@ def test_sum_empty(self, skipna): assert isinstance(result, Timedelta) assert result == Timedelta(0) - def test_min_max(self): - arr = TimedeltaArray._from_sequence(["3h", "3h", "NaT", "2h", "5h", "4h"]) + def test_min_max(self, unit): + dtype = f"m8[{unit}]" + arr = TimedeltaArray._from_sequence( + ["3h", "3h", "NaT", "2h", "5h", "4h"], dtype=dtype + ) result = arr.min() expected = Timedelta("2h") diff --git a/pandas/tests/extension/array_with_attr/array.py b/pandas/tests/extension/array_with_attr/array.py index 4e40b6d0a714f..d0249d9af8098 100644 --- a/pandas/tests/extension/array_with_attr/array.py +++ b/pandas/tests/extension/array_with_attr/array.py @@ -48,7 +48,7 @@ def __init__(self, values, attr=None) -> None: self.attr = attr @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): data = np.array(scalars, dtype="float64", copy=copy) return cls(data) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 187da89729f0e..340b7df1772e9 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -73,7 +73,7 @@ def test_setitem_sequence_mismatched_length_raises(self, data, as_array): original = ser.copy() value = [data[0]] if as_array: - value = data._from_sequence(value) + value = data._from_sequence(value, dtype=data.dtype) xpr = "cannot set using a {} indexer with a different length" with pytest.raises(ValueError, match=xpr.format("list-like")): diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 9ce7ac309b6d3..521c1ff0b96bc 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -97,12 +97,14 @@ def dtype(self): return self._dtype @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): return cls(scalars) @classmethod def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): - return cls._from_sequence([decimal.Decimal(x) for x in strings], dtype, copy) + return cls._from_sequence( + [decimal.Decimal(x) for x in strings], dtype=dtype, copy=copy + ) @classmethod def _from_factorized(cls, values, original): @@ -155,7 +157,7 @@ def reconstruct(x): if isinstance(x, (decimal.Decimal, numbers.Number)): return x else: - return DecimalArray._from_sequence(x) + return type(self)._from_sequence(x, dtype=self.dtype) if ufunc.nout > 1: return tuple(reconstruct(x) for x in result) @@ -178,7 +180,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): fill_value = self.dtype.na_value result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill) - return self._from_sequence(result) + return self._from_sequence(result, dtype=self.dtype) def copy(self): return type(self)(self._data.copy(), dtype=self.dtype) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 5ccffd1d25b3d..b3c57ee49a724 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -328,7 +328,7 @@ class DecimalArrayWithoutFromSequence(DecimalArray): """Helper class for testing error handling in _from_sequence.""" @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): raise KeyError("For the test") diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 05472eb709190..d3d9dcc4a4712 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -83,7 +83,7 @@ def __init__(self, values, dtype=None, copy=False) -> None: # self._values = self.values = self.data @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): return cls(scalars) @classmethod @@ -112,7 +112,9 @@ def __getitem__(self, item): else: item = pd.api.indexers.check_array_indexer(self, item) if is_bool_dtype(item.dtype): - return self._from_sequence([x for x, m in zip(self, item) if m]) + return type(self)._from_sequence( + [x for x, m in zip(self, item) if m], dtype=self.dtype + ) # integer return type(self)([self.data[i] for i in item]) @@ -187,7 +189,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): except IndexError as err: raise IndexError(msg) from err - return self._from_sequence(output) + return type(self)._from_sequence(output, dtype=self.dtype) def copy(self): return type(self)(self.data[:]) @@ -206,7 +208,8 @@ def astype(self, dtype, copy=True): return self elif isinstance(dtype, StringDtype): value = self.astype(str) # numpy doesn't like nested dicts - return dtype.construct_array_type()._from_sequence(value, copy=False) + arr_cls = dtype.construct_array_type() + return arr_cls._from_sequence(value, dtype=dtype, copy=False) return np.array([dict(x) for x in self], dtype=dtype, copy=copy) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 75e5fb00586e6..47cd3a51f664b 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1575,7 +1575,7 @@ def test_setitem_null_slice(data): result[:] = data[0] expected = ArrowExtensionArray._from_sequence( [data[0]] * len(data), - dtype=data._pa_array.type, + dtype=data.dtype, ) tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 8c5098a53adfc..bf6db6e9f16ec 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -59,33 +59,33 @@ def data(dtype, chunked): while strings[0] == strings[1]: strings = np.random.default_rng(2).choice(list(string.ascii_letters), size=100) - arr = dtype.construct_array_type()._from_sequence(strings) + arr = dtype.construct_array_type()._from_sequence(strings, dtype=dtype) return maybe_split_array(arr, chunked) @pytest.fixture def data_missing(dtype, chunked): """Length 2 array with [NA, Valid]""" - arr = dtype.construct_array_type()._from_sequence([pd.NA, "A"]) + arr = dtype.construct_array_type()._from_sequence([pd.NA, "A"], dtype=dtype) return maybe_split_array(arr, chunked) @pytest.fixture def data_for_sorting(dtype, chunked): - arr = dtype.construct_array_type()._from_sequence(["B", "C", "A"]) + arr = dtype.construct_array_type()._from_sequence(["B", "C", "A"], dtype=dtype) return maybe_split_array(arr, chunked) @pytest.fixture def data_missing_for_sorting(dtype, chunked): - arr = dtype.construct_array_type()._from_sequence(["B", pd.NA, "A"]) + arr = dtype.construct_array_type()._from_sequence(["B", pd.NA, "A"], dtype=dtype) return maybe_split_array(arr, chunked) @pytest.fixture def data_for_grouping(dtype, chunked): arr = dtype.construct_array_type()._from_sequence( - ["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"] + ["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"], dtype=dtype ) return maybe_split_array(arr, chunked) diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index d53581fab40c7..387dc47c48d20 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -569,11 +569,14 @@ def test_mixed_freq_raises(self): vals = [end_intv, Period("2006-12-31", "w")] msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" + depr_msg = r"PeriodDtype\[B\] is deprecated" with pytest.raises(IncompatibleFrequency, match=msg): - PeriodIndex(vals) + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + PeriodIndex(vals) vals = np.array(vals) with pytest.raises(IncompatibleFrequency, match=msg): - PeriodIndex(vals) + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + PeriodIndex(vals) @pytest.mark.parametrize( "freq", ["M", "Q", "Y", "D", "B", "min", "s", "ms", "us", "ns", "h"] diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 3eaf21a2eee26..cb046e0133245 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -442,7 +442,7 @@ def test_nat_rfloordiv_timedelta(val, expected): [ DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"), DatetimeIndex(["2011-01-01", "2011-01-02"], tz="US/Eastern", name="x"), - DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"]), + DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"], dtype="M8[ns]"), DatetimeArray._from_sequence( ["2011-01-01", "2011-01-02"], dtype=DatetimeTZDtype(tz="US/Pacific") ), diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index c2e39dc38d5ff..898a027255190 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -305,7 +305,7 @@ def test_from_obscure_array(dtype, array_likes): cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype] expected = cls(arr) - result = cls._from_sequence(data) + result = cls._from_sequence(data, dtype=dtype) tm.assert_extension_array_equal(result, expected) if not isinstance(data, memoryview):