Skip to content

Commit 14cf864

Browse files
authored
REF: pass dtype to _from_sequence (#56436)
* REF: pass dtype to _from_sequence * mypy fixup * mypy fixup * restore but better type safety
1 parent 2a02b00 commit 14cf864

33 files changed

+143
-84
lines changed

pandas/_libs/parsers.pyx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1471,7 +1471,9 @@ def _maybe_upcast(
14711471

14721472
elif arr.dtype == np.object_:
14731473
if use_dtype_backend:
1474-
arr = StringDtype().construct_array_type()._from_sequence(arr)
1474+
dtype = StringDtype()
1475+
cls = dtype.construct_array_type()
1476+
arr = cls._from_sequence(arr, dtype=dtype)
14751477

14761478
if use_dtype_backend and dtype_backend == "pyarrow":
14771479
import pyarrow as pa

pandas/core/arrays/arrow/array.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
is_integer,
4141
is_list_like,
4242
is_scalar,
43+
pandas_dtype,
4344
)
4445
from pandas.core.dtypes.dtypes import DatetimeTZDtype
4546
from pandas.core.dtypes.missing import isna
@@ -273,6 +274,10 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
273274
"""
274275
Construct a new ExtensionArray from a sequence of scalars.
275276
"""
277+
if dtype is not None and isinstance(dtype, str):
278+
# FIXME: in tests.extension.test_arrow we pass pyarrow _type_ objects
279+
# which raise when passed to pandas_dtype
280+
dtype = pandas_dtype(dtype)
276281
pa_type = to_pyarrow_type(dtype)
277282
pa_array = cls._box_pa_array(scalars, pa_type=pa_type, copy=copy)
278283
arr = cls(pa_array)

pandas/core/arrays/numeric.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,12 @@ def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarr
132132
raise AbstractMethodError(cls)
133133

134134

135-
def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype):
135+
def _coerce_to_data_and_mask(
136+
values, dtype, copy: bool, dtype_cls: type[NumericDtype], default_dtype: np.dtype
137+
):
136138
checker = dtype_cls._checker
137139

140+
mask = None
138141
inferred_type = None
139142

140143
if dtype is None and hasattr(values, "dtype"):
@@ -190,7 +193,7 @@ def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype
190193
if dtype is None:
191194
dtype = default_dtype
192195
else:
193-
dtype = dtype.type
196+
dtype = dtype.numpy_dtype
194197

195198
if is_integer_dtype(dtype) and values.dtype.kind == "f" and len(values) > 0:
196199
if mask.all():
@@ -260,9 +263,8 @@ def _coerce_to_array(
260263
) -> tuple[np.ndarray, np.ndarray]:
261264
dtype_cls = cls._dtype_cls
262265
default_dtype = dtype_cls._default_np_dtype
263-
mask = None
264266
values, mask, _, _ = _coerce_to_data_and_mask(
265-
value, mask, dtype, copy, dtype_cls, default_dtype
267+
value, dtype, copy, dtype_cls, default_dtype
266268
)
267269
return values, mask
268270

pandas/core/arrays/period.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1090,7 +1090,9 @@ def period_array(
10901090
return PeriodArray(ordinals, dtype=dtype)
10911091

10921092
data = ensure_object(arrdata)
1093-
1093+
if freq is None:
1094+
freq = libperiod.extract_freq(data)
1095+
dtype = PeriodDtype(freq)
10941096
return PeriodArray._from_sequence(data, dtype=dtype)
10951097

10961098

pandas/core/construction.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,9 @@ def array(
349349

350350
elif inferred_dtype == "string":
351351
# StringArray/ArrowStringArray depending on pd.options.mode.string_storage
352-
return StringDtype().construct_array_type()._from_sequence(data, copy=copy)
352+
dtype = StringDtype()
353+
cls = dtype.construct_array_type()
354+
return cls._from_sequence(data, dtype=dtype, copy=copy)
353355

354356
elif inferred_dtype == "integer":
355357
return IntegerArray._from_sequence(data, copy=copy)
@@ -364,7 +366,7 @@ def array(
364366
return FloatingArray._from_sequence(data, copy=copy)
365367

366368
elif inferred_dtype == "boolean":
367-
return BooleanArray._from_sequence(data, copy=copy)
369+
return BooleanArray._from_sequence(data, dtype="boolean", copy=copy)
368370

369371
# Pandas overrides NumPy for
370372
# 1. datetime64[ns,us,ms,s]

pandas/core/groupby/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2330,7 +2330,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
23302330
elif isinstance(bvalues, ArrowExtensionArray) and not isinstance(
23312331
bvalues.dtype, StringDtype
23322332
):
2333-
return type(bvalues)._from_sequence(counted[0])
2333+
return type(bvalues)._from_sequence(counted[0], dtype="int64[pyarrow]")
23342334
if is_series:
23352335
assert counted.ndim == 2
23362336
assert counted.shape[0] == 1

pandas/core/indexes/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5194,12 +5194,12 @@ def _get_join_target(self) -> np.ndarray:
51945194
def _from_join_target(self, result: np.ndarray) -> ArrayLike:
51955195
"""
51965196
Cast the ndarray returned from one of the libjoin.foo_indexer functions
5197-
back to type(self)._data.
5197+
back to type(self._data).
51985198
"""
51995199
if isinstance(self.values, BaseMaskedArray):
52005200
return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_))
52015201
elif isinstance(self.values, (ArrowExtensionArray, StringArray)):
5202-
return type(self.values)._from_sequence(result)
5202+
return type(self.values)._from_sequence(result, dtype=self.dtype)
52035203
return result
52045204

52055205
@doc(IndexOpsMixin._memory_usage)

pandas/core/internals/construction.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1044,7 +1044,9 @@ def convert(arr):
10441044
# i.e. maybe_convert_objects didn't convert
10451045
arr = maybe_infer_to_datetimelike(arr)
10461046
if dtype_backend != "numpy" and arr.dtype == np.dtype("O"):
1047-
arr = StringDtype().construct_array_type()._from_sequence(arr)
1047+
new_dtype = StringDtype()
1048+
arr_cls = new_dtype.construct_array_type()
1049+
arr = arr_cls._from_sequence(arr, dtype=new_dtype)
10481050
elif dtype_backend != "numpy" and isinstance(arr, np.ndarray):
10491051
if arr.dtype.kind in "iufb":
10501052
arr = pd_array(arr, copy=False)

pandas/core/strings/object_array.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def rep(x, r):
207207
)
208208
if isinstance(self, BaseStringArray):
209209
# Not going through map, so we have to do this here.
210-
result = type(self)._from_sequence(result)
210+
result = type(self)._from_sequence(result, dtype=self.dtype)
211211
return result
212212

213213
def _str_match(

pandas/io/parsers/base_parser.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,9 @@ def _infer_types(
757757
elif result.dtype == np.object_ and non_default_dtype_backend:
758758
# read_excel sends array of datetime objects
759759
if not lib.is_datetime_array(result, skipna=True):
760-
result = StringDtype().construct_array_type()._from_sequence(values)
760+
dtype = StringDtype()
761+
cls = dtype.construct_array_type()
762+
result = cls._from_sequence(values, dtype=dtype)
761763

762764
if dtype_backend == "pyarrow":
763765
pa = import_optional_dependency("pyarrow")

0 commit comments

Comments
 (0)