From 946dd787f9076ffdbfd2a0d3571559005207dea6 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 22 Jul 2025 15:41:51 +0200 Subject: [PATCH 01/26] fix: keep dtype as `object` for `pd.StringDtype` in `safe_cast_to_index` --- pyproject.toml | 1 + xarray/core/indexes.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 5e5fd00328b..c7e1e04d85f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ accel = [ "numba>=0.59", "flox>=0.9", "opt_einsum", + "numpy<2.3", # numba has not updated yet: https://github.com/numba/numba/issues/10105 ] complete = ["xarray[accel,etc,io,parallel,viz]"] io = [ diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index c233c6911e4..1fe36956fc1 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -536,6 +536,8 @@ def safe_cast_to_index(array: Any) -> pd.Index: kwargs["dtype"] = "float64" index = pd.Index(to_numpy(array), **kwargs) + if isinstance(index.dtype, pd.StringDtype): + index = index.astype("O") return _maybe_cast_to_cftimeindex(index) From 147e3a7216a632a3520fe9596ece382465e245f4 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 22 Jul 2025 15:42:40 +0200 Subject: [PATCH 02/26] chore: comment --- xarray/core/indexes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 1fe36956fc1..df7cce3d6e7 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -536,6 +536,7 @@ def safe_cast_to_index(array: Any) -> pd.Index: kwargs["dtype"] = "float64" index = pd.Index(to_numpy(array), **kwargs) + # See https://github.com/pydata/xarray/issues/10553 if isinstance(index.dtype, pd.StringDtype): index = index.astype("O") From 3cf92dd966733fdd9eba904c98b01510c68b5bfa Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 23 Jul 2025 11:58:48 +0200 Subject: [PATCH 03/26] fix: broader fix --- xarray/core/extension_array.py | 6 +++--- xarray/core/indexes.py | 7 ++----- xarray/core/indexing.py | 13 ++++++------- xarray/core/utils.py | 14 ++++++++++++++ xarray/core/variable.py | 8 ++++++-- xarray/tests/test_pandas_to_xarray.py | 5 ++++- 6 files changed, 35 insertions(+), 18 deletions(-) diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py index d85f7e66b55..b752f7141af 100644 --- a/xarray/core/extension_array.py +++ b/xarray/core/extension_array.py @@ -11,7 +11,7 @@ from pandas.api.types import is_extension_array_dtype from xarray.core.types import DTypeLikeSave, T_ExtensionArray -from xarray.core.utils import NDArrayMixin +from xarray.core.utils import NDArrayMixin, is_allowed_extension_array HANDLED_EXTENSION_ARRAY_FUNCTIONS: dict[Callable, Callable] = {} @@ -101,9 +101,9 @@ def __post_init__(self): # This does not use the UNSUPPORTED_EXTENSION_ARRAY_TYPES whitelist because # we do support extension arrays from datetime, for example, that need # duck array support internally via this class. - if isinstance(self.array, pd.arrays.NumpyExtensionArray): + if not is_allowed_extension_array(self.array): raise TypeError( - "`NumpyExtensionArray` should be converted to a numpy array in `xarray` internally." + "`NumpyExtensionArray` or string dtype should be converted to a numpy array in `xarray` internally." ) def __array_function__(self, func, types, args, kwargs): diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index df7cce3d6e7..d22fc37aa4f 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -23,6 +23,7 @@ Frozen, emit_user_level_warning, get_valid_numpy_dtype, + is_allowed_extension_array_dtype, is_dict_like, is_scalar, ) @@ -536,9 +537,6 @@ def safe_cast_to_index(array: Any) -> pd.Index: kwargs["dtype"] = "float64" index = pd.Index(to_numpy(array), **kwargs) - # See https://github.com/pydata/xarray/issues/10553 - if isinstance(index.dtype, pd.StringDtype): - index = index.astype("O") return _maybe_cast_to_cftimeindex(index) @@ -669,9 +667,8 @@ def __init__( self.index = index self.dim = dim - if coord_dtype is None: - if pd.api.types.is_extension_array_dtype(index.dtype): + if is_allowed_extension_array_dtype(index.dtype): cast(pd.api.extensions.ExtensionDtype, index.dtype) coord_dtype = index.dtype else: diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 8e4458fb88f..c98175578f8 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -24,6 +24,8 @@ NDArrayMixin, either_dict_or_kwargs, get_valid_numpy_dtype, + is_allowed_extension_array, + is_allowed_extension_array_dtype, is_duck_array, is_duck_dask_array, is_scalar, @@ -1763,12 +1765,12 @@ def __init__( self.array = safe_cast_to_index(array) if dtype is None: - if pd.api.types.is_extension_array_dtype(array.dtype): + if is_allowed_extension_array(array): cast(pd.api.extensions.ExtensionDtype, array.dtype) self._dtype = array.dtype else: self._dtype = get_valid_numpy_dtype(array) - elif pd.api.types.is_extension_array_dtype(dtype): + elif is_allowed_extension_array_dtype(dtype): self._dtype = cast(pd.api.extensions.ExtensionDtype, dtype) else: self._dtype = np.dtype(cast(DTypeLike, dtype)) @@ -1816,10 +1818,7 @@ def get_duck_array(self) -> np.ndarray | PandasExtensionArray: # We return an PandasExtensionArray wrapper type that satisfies # duck array protocols. # `NumpyExtensionArray` is excluded - if pd.api.types.is_extension_array_dtype(self.array) and not isinstance( - self.array.array, - pd.arrays.NumpyExtensionArray, # type: ignore[attr-defined] - ): + if is_allowed_extension_array(self.array): from xarray.core.extension_array import PandasExtensionArray return PandasExtensionArray(self.array.array) @@ -1916,7 +1915,7 @@ def copy(self, deep: bool = True) -> Self: @property def nbytes(self) -> int: - if pd.api.types.is_extension_array_dtype(self.dtype): + if is_allowed_extension_array(self.array): return self.array.nbytes dtype = self._get_numpy_dtype() diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 562706a1ac0..6f6cfe18c9d 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -104,6 +104,20 @@ T = TypeVar("T") +def is_allowed_extension_array_dtype(dtype: Any): + return pd.api.types.is_extension_array_dtype(dtype) and not isinstance( + dtype, pd.StringDtype + ) + + +def is_allowed_extension_array(array: Any): + return ( + hasattr(array, "dtype") + and is_allowed_extension_array_dtype(array.dtype) + and not isinstance(array, pd.arrays.NumpyExtensionArray) + ) + + def alias_message(old_name: str, new_name: str) -> str: return f"{old_name} has been deprecated. Use {new_name} instead." diff --git a/xarray/core/variable.py b/xarray/core/variable.py index bcc2ca4e460..325dde57e1d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -40,6 +40,7 @@ emit_user_level_warning, ensure_us_time_resolution, infix_dims, + is_allowed_extension_array, is_dict_like, is_duck_array, is_duck_dask_array, @@ -198,7 +199,9 @@ def _maybe_wrap_data(data): return PandasIndexingAdapter(data) if isinstance(data, UNSUPPORTED_EXTENSION_ARRAY_TYPES): return data.to_numpy() - if isinstance(data, pd.api.extensions.ExtensionArray): + if isinstance( + data, pd.api.extensions.ExtensionArray + ) and is_allowed_extension_array(data): return PandasExtensionArray(data) return data @@ -261,7 +264,8 @@ def convert_non_numpy_type(data): if isinstance(data, pd.Series | pd.DataFrame): if ( isinstance(data, pd.Series) - and pd.api.types.is_extension_array_dtype(data) + and is_allowed_extension_array(data.array) + # Some datetime types are not allowed as well as backing Variable types and not isinstance(data.array, UNSUPPORTED_EXTENSION_ARRAY_TYPES) ): pandas_data = data.array diff --git a/xarray/tests/test_pandas_to_xarray.py b/xarray/tests/test_pandas_to_xarray.py index 111866541eb..8346f5abe21 100644 --- a/xarray/tests/test_pandas_to_xarray.py +++ b/xarray/tests/test_pandas_to_xarray.py @@ -37,6 +37,7 @@ import pandas as pd import pandas._testing as tm import pytest +from packaging.version import Version from pandas import ( Categorical, CategoricalIndex, @@ -171,7 +172,9 @@ def test_to_xarray_with_multiindex(self, df): result = result.to_dataframe() expected = df.copy() - expected["f"] = expected["f"].astype(object) + expected["f"] = expected["f"].astype( + object if Version(pd.__version__) < Version("3.0.0dev0") else str + ) expected.columns.name = None tm.assert_frame_equal(result, expected) From 287e4bef974f20ce00faaee69d2f12e059b0714b Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 23 Jul 2025 12:22:07 +0200 Subject: [PATCH 04/26] feat: ban use of `pd.api.types.is_extension_array_dtype` --- pyproject.toml | 2 ++ xarray/core/dataset.py | 10 +++++----- xarray/core/dtypes.py | 4 ++-- xarray/core/duck_array_ops.py | 9 +++++---- xarray/core/extension_array.py | 5 ++--- xarray/core/utils.py | 2 +- xarray/tests/test_variable.py | 2 +- 7 files changed, 18 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c7e1e04d85f..d4901f4d78b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -325,6 +325,8 @@ known-first-party = ["xarray"] [tool.ruff.lint.flake8-tidy-imports] # Disallow all relative imports. ban-relative-imports = "all" +[tool.ruff.lint.flake8-tidy-imports.banned-api] +"pandas.api.types.is_extension_array_dtype".msg = "Use xarray.core.utils.is_allowed_extension_array{_dtype} instead. Only use the banend API if the incoming data has already been sanitized by xarray" [tool.pytest.ini_options] addopts = [ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 26db282c3df..46062c65a62 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -26,7 +26,6 @@ import numpy as np import pandas as pd -from pandas.api.types import is_extension_array_dtype from xarray.coding.calendar_ops import convert_calendar, interp_calendar from xarray.coding.cftimeindex import CFTimeIndex, _parse_array_of_cftime_strings @@ -91,6 +90,7 @@ either_dict_or_kwargs, emit_user_level_warning, infix_dims, + is_allowed_extension_array, is_dict_like, is_duck_array, is_duck_dask_array, @@ -6771,7 +6771,7 @@ def reduce( elif ( # Some reduction functions (e.g. std, var) need to run on variables # that don't have the reduce dims: PR5393 - not is_extension_array_dtype(var.dtype) + not pd.api.types.is_extension_array_dtype(var.dtype) # noqa: TID251 and ( not reduce_dims or not numeric_only @@ -7096,12 +7096,12 @@ def _to_dataframe(self, ordered_dims: Mapping[Any, int]): non_extension_array_columns = [ k for k in columns_in_order - if not is_extension_array_dtype(self.variables[k].data) + if not pd.api.types.is_extension_array_dtype(self.variables[k].data) # noqa: TID251 ] extension_array_columns = [ k for k in columns_in_order - if is_extension_array_dtype(self.variables[k].data) + if pd.api.types.is_extension_array_dtype(self.variables[k].data) # noqa: TID251 ] extension_array_columns_different_index = [ k @@ -7293,7 +7293,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self: arrays = [] extension_arrays = [] for k, v in dataframe.items(): - if not is_extension_array_dtype(v) or isinstance( + if not is_allowed_extension_array(v) or isinstance( v.array, UNSUPPORTED_EXTENSION_ARRAY_TYPES ): arrays.append((k, np.asarray(v))) diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py index c959a7f2536..0a7b1722877 100644 --- a/xarray/core/dtypes.py +++ b/xarray/core/dtypes.py @@ -4,7 +4,7 @@ from typing import Any import numpy as np -from pandas.api.types import is_extension_array_dtype +import pandas as pd from xarray.compat import array_api_compat, npcompat from xarray.compat.npcompat import HAS_STRING_DTYPE @@ -213,7 +213,7 @@ def isdtype(dtype, kind: str | tuple[str, ...], xp=None) -> bool: if isinstance(dtype, np.dtype): return npcompat.isdtype(dtype, kind) - elif is_extension_array_dtype(dtype): + elif pd.api.types.is_extension_array_dtype(dtype): # noqa: TID251 # we never want to match pandas extension array dtypes return False else: diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 0c7d40113d6..7687e452496 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -23,7 +23,6 @@ take, unravel_index, # noqa: F401 ) -from pandas.api.types import is_extension_array_dtype from xarray.compat import dask_array_compat, dask_array_ops from xarray.compat.array_api_compat import get_array_namespace @@ -184,7 +183,7 @@ def isnull(data): dtype = xp.bool_ if hasattr(xp, "bool_") else xp.bool return full_like(data, dtype=dtype, fill_value=False) # at this point, array should have dtype=object - elif isinstance(data, np.ndarray) or is_extension_array_dtype(data): + elif isinstance(data, np.ndarray) or pd.api.types.is_extension_array_dtype(data): # noqa: TID251 return pandas_isnull(data) else: # Not reachable yet, but intended for use with other duck array @@ -266,9 +265,11 @@ def asarray(data, xp=np, dtype=None): def as_shared_dtype(scalars_or_arrays, xp=None): """Cast arrays to a shared dtype using xarray's type promotion rules.""" - if any(is_extension_array_dtype(x) for x in scalars_or_arrays): + if any(pd.api.types.is_extension_array_dtype(x) for x in scalars_or_arrays): # noqa: TID251 extension_array_types = [ - x.dtype for x in scalars_or_arrays if is_extension_array_dtype(x) + x.dtype + for x in scalars_or_arrays + if pd.api.types.is_extension_array_dtype(x) # noqa: TID251 ] non_nans = [x for x in scalars_or_arrays if not isna(x)] if len(extension_array_types) == len(non_nans) and all( diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py index b752f7141af..cb7499d91c3 100644 --- a/xarray/core/extension_array.py +++ b/xarray/core/extension_array.py @@ -8,7 +8,6 @@ import numpy as np import pandas as pd from packaging.version import Version -from pandas.api.types import is_extension_array_dtype from xarray.core.types import DTypeLikeSave, T_ExtensionArray from xarray.core.utils import NDArrayMixin, is_allowed_extension_array @@ -126,7 +125,7 @@ def replace_duck_with_extension_array(args) -> list: if func not in HANDLED_EXTENSION_ARRAY_FUNCTIONS: raise KeyError("Function not registered for pandas extension arrays.") res = HANDLED_EXTENSION_ARRAY_FUNCTIONS[func](*args, **kwargs) - if is_extension_array_dtype(res): + if pd.api.types.is_extension_array_dtype(res): # noqa: TID251 return PandasExtensionArray(res) return res @@ -135,7 +134,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): def __getitem__(self, key) -> PandasExtensionArray[T_ExtensionArray]: item = self.array[key] - if is_extension_array_dtype(item): + if pd.api.types.is_extension_array_dtype(item): # noqa: TID251 return PandasExtensionArray(item) if np.isscalar(item) or isinstance(key, int): return PandasExtensionArray(type(self.array)._from_sequence([item])) # type: ignore[call-arg,attr-defined,unused-ignore] diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 6f6cfe18c9d..f51a73fb32b 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -105,7 +105,7 @@ def is_allowed_extension_array_dtype(dtype: Any): - return pd.api.types.is_extension_array_dtype(dtype) and not isinstance( + return pd.api.types.is_extension_array_dtype(dtype) and not isinstance( # noqa: TID251 dtype, pd.StringDtype ) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 2f67e97522c..e2f4a3154f3 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1594,7 +1594,7 @@ def test_pandas_categorical_dtype(self): data = pd.Categorical(np.arange(10, dtype="int64")) v = self.cls("x", data) print(v) # should not error - assert pd.api.types.is_extension_array_dtype(v.dtype) + assert isinstance(v.dtype, pd.CategoricalDtype) def test_squeeze(self): v = Variable(["x", "y"], [[1]]) From ffc905d1a10f19c2cc3206950e867e8f09dcb90d Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 23 Jul 2025 13:33:26 +0200 Subject: [PATCH 05/26] fix: type ignore --- xarray/core/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index f51a73fb32b..2de57b6d213 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -114,7 +114,7 @@ def is_allowed_extension_array(array: Any): return ( hasattr(array, "dtype") and is_allowed_extension_array_dtype(array.dtype) - and not isinstance(array, pd.arrays.NumpyExtensionArray) + and not isinstance(array, pd.arrays.NumpyExtensionArray) # type: ignore[attr-defined] ) From aa68745ace8227bede2a036eb61930fc26e39dd5 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 24 Jul 2025 11:31:56 +0200 Subject: [PATCH 06/26] fix: `pd.Series` in `pandas>=3` does not preserve object dtype metadata [test-upstream] --- xarray/core/variable.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 325dde57e1d..eff332539ab 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -13,6 +13,7 @@ import numpy as np import pandas as pd from numpy.typing import ArrayLike +from packaging.version import Version import xarray as xr # only for Dataset and DataArray from xarray.compat.array_api_compat import to_like_array @@ -217,6 +218,14 @@ def _possibly_convert_objects(values): """ as_series = pd.Series(values.ravel(), copy=False) result = np.asarray(as_series).reshape(values.shape) + # FIXME: Why does pd.Series no longer preserve data type metadata for object dtype? + if ( + result.dtype.kind == "O" + and values.dtype.kind == "O" + and Version(pd.__version__) >= Version("3.0.0dev0") + ): + result = np.asarray(as_series, copy=True).reshape(values.shape) + result.dtype = values.dtype if not result.flags.writeable: # GH8843, pandas copy-on-write mode creates read-only arrays by default try: From 29c5224383a9d50a1d87a5737d8228ac8f3092d7 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 24 Jul 2025 11:38:25 +0200 Subject: [PATCH 07/26] fix: bytes for catgeorical repr [test-upstream] [test-upstream] --- xarray/tests/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3e0734c8a1a..ed1a16b3dfb 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -299,7 +299,7 @@ def test_repr(self) -> None: var1 (dim1, dim2) float64 576B -0.9891 -0.3678 1.288 ... -0.2116 0.364 var2 (dim1, dim2) float64 576B 0.953 1.52 1.704 ... 0.1347 -0.6423 var3 (dim3, dim1) float64 640B 0.4107 0.9941 0.1665 ... 0.716 1.555 - var4 (dim1) category 32B b c b a c a c a{var5} + var4 (dim1) category 36B b c b a c a c a{var5} Attributes: foo: bar""" ) From ed86c095dde859b848f61a6b0fae7bc7edf98ad8 Mon Sep 17 00:00:00 2001 From: Ilan Gold Date: Thu, 24 Jul 2025 11:48:51 +0200 Subject: [PATCH 08/26] Update xarray/core/extension_array.py Co-authored-by: Deepak Cherian --- xarray/core/extension_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py index cb7499d91c3..7a51ba09084 100644 --- a/xarray/core/extension_array.py +++ b/xarray/core/extension_array.py @@ -102,7 +102,7 @@ def __post_init__(self): # duck array support internally via this class. if not is_allowed_extension_array(self.array): raise TypeError( - "`NumpyExtensionArray` or string dtype should be converted to a numpy array in `xarray` internally." + f"{self.array.dtype!r} should be converted to a numpy array in `xarray` internally." ) def __array_function__(self, func, types, args, kwargs): From f603aa09ff5757927fcea2bb6fc342f43c04f87e Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 24 Jul 2025 11:50:22 +0200 Subject: [PATCH 09/26] fix: repr --- xarray/tests/test_dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index ed1a16b3dfb..20c5d653187 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -13,6 +13,7 @@ import numpy as np import pandas as pd import pytest +from packaging.version import Version from pandas.core.indexes.datetimes import DatetimeIndex # remove once numpy 2.0 is the oldest supported version @@ -299,7 +300,7 @@ def test_repr(self) -> None: var1 (dim1, dim2) float64 576B -0.9891 -0.3678 1.288 ... -0.2116 0.364 var2 (dim1, dim2) float64 576B 0.953 1.52 1.704 ... 0.1347 -0.6423 var3 (dim3, dim1) float64 640B 0.4107 0.9941 0.1665 ... 0.716 1.555 - var4 (dim1) category 36B b c b a c a c a{var5} + var4 (dim1) category 3{6 if Version(pd.__version__) >= Version("3.0.0dev0") else 2}B b c b a c a c a{var5} Attributes: foo: bar""" ) From 02f2496f037b28731f29d0dd3f49d429cbf930cb Mon Sep 17 00:00:00 2001 From: Ilan Gold Date: Fri, 25 Jul 2025 15:25:31 +0200 Subject: [PATCH 10/26] Update variable.py --- xarray/core/variable.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index eff332539ab..9f2d00f879f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -217,8 +217,7 @@ def _possibly_convert_objects(values): * pd.Timedelta """ as_series = pd.Series(values.ravel(), copy=False) - result = np.asarray(as_series).reshape(values.shape) - # FIXME: Why does pd.Series no longer preserve data type metadata for object dtype? + # For why we need this behavior: https://github.com/pandas-dev/pandas/issues/61938 if ( result.dtype.kind == "O" and values.dtype.kind == "O" @@ -226,6 +225,8 @@ def _possibly_convert_objects(values): ): result = np.asarray(as_series, copy=True).reshape(values.shape) result.dtype = values.dtype + else: + result = np.asarray(as_series).reshape(values.shape) if not result.flags.writeable: # GH8843, pandas copy-on-write mode creates read-only arrays by default try: From 9ea9d8d7b576ba9867236cbbef5c8fe1a097ea08 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 25 Jul 2025 17:55:25 +0200 Subject: [PATCH 11/26] ? mypy --- xarray/computation/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/computation/ops.py b/xarray/computation/ops.py index 61834a85acf..70a7f916c8b 100644 --- a/xarray/computation/ops.py +++ b/xarray/computation/ops.py @@ -159,7 +159,7 @@ def fillna(data, other, join="left", dataset_join="left"): # Unsure why we get a mypy error here -def where_method(self, cond, other=dtypes.NA): # type: ignore[has-type] +def where_method(self, cond, other=dtypes.NA): """Return elements from `self` or `other` depending on `cond`. Parameters From 7dc9662d91e4add6167dc6002cdbf78be7474c2d Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Fri, 25 Jul 2025 17:57:04 +0200 Subject: [PATCH 12/26] fix: remove comment --- xarray/computation/ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/computation/ops.py b/xarray/computation/ops.py index 70a7f916c8b..8067ebb3a5f 100644 --- a/xarray/computation/ops.py +++ b/xarray/computation/ops.py @@ -158,7 +158,6 @@ def fillna(data, other, join="left", dataset_join="left"): ) -# Unsure why we get a mypy error here def where_method(self, cond, other=dtypes.NA): """Return elements from `self` or `other` depending on `cond`. From f05e703ca10f664daa17dcf359dc25ab53e274a2 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 28 Jul 2025 15:20:56 +0200 Subject: [PATCH 13/26] try blanket ignore --- xarray/computation/ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/computation/ops.py b/xarray/computation/ops.py index 8067ebb3a5f..e91b5ef2164 100644 --- a/xarray/computation/ops.py +++ b/xarray/computation/ops.py @@ -158,7 +158,8 @@ def fillna(data, other, join="left", dataset_join="left"): ) -def where_method(self, cond, other=dtypes.NA): +# Unsure why we get a mypy error here, or why +def where_method(self, cond, other=dtypes.NA): # type: ignore # noqa: PGH003 """Return elements from `self` or `other` depending on `cond`. Parameters From 7343f6dd950cdd1acc25ae4b9e0308e77c6960e2 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 28 Jul 2025 15:26:59 +0200 Subject: [PATCH 14/26] try blanket ignore --- xarray/computation/ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/computation/ops.py b/xarray/computation/ops.py index 8067ebb3a5f..e91b5ef2164 100644 --- a/xarray/computation/ops.py +++ b/xarray/computation/ops.py @@ -158,7 +158,8 @@ def fillna(data, other, join="left", dataset_join="left"): ) -def where_method(self, cond, other=dtypes.NA): +# Unsure why we get a mypy error here, or why +def where_method(self, cond, other=dtypes.NA): # type: ignore # noqa: PGH003 """Return elements from `self` or `other` depending on `cond`. Parameters From cc1776fb3382ac9a87fabcd1519030ee21185ac6 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 28 Jul 2025 15:33:45 +0200 Subject: [PATCH 15/26] try blanket ignore again --- xarray/computation/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/computation/ops.py b/xarray/computation/ops.py index e91b5ef2164..cae7cef841a 100644 --- a/xarray/computation/ops.py +++ b/xarray/computation/ops.py @@ -159,7 +159,7 @@ def fillna(data, other, join="left", dataset_join="left"): # Unsure why we get a mypy error here, or why -def where_method(self, cond, other=dtypes.NA): # type: ignore # noqa: PGH003 +def where_method(self, cond, other=dtypes.NA): # type: ignore[unused-ignore] """Return elements from `self` or `other` depending on `cond`. Parameters From 2e8ed6781a3e4d31d91e54b9fa151d4e4fae6d8c Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 28 Jul 2025 16:17:00 +0200 Subject: [PATCH 16/26] fix: mypy --- xarray/computation/ops.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/computation/ops.py b/xarray/computation/ops.py index cae7cef841a..fbc14040671 100644 --- a/xarray/computation/ops.py +++ b/xarray/computation/ops.py @@ -8,12 +8,18 @@ from __future__ import annotations import operator -from typing import Literal +from typing import TYPE_CHECKING, Literal import numpy as np from xarray.core import dtypes, duck_array_ops +if TYPE_CHECKING: + from typing import Any + + from xarray.core.dataarray import DataArray + from xarray.core.dataset import Dataset + try: import bottleneck as bn @@ -158,8 +164,7 @@ def fillna(data, other, join="left", dataset_join="left"): ) -# Unsure why we get a mypy error here, or why -def where_method(self, cond, other=dtypes.NA): # type: ignore[unused-ignore] +def where_method(self, cond: DataArray | Dataset, other: Any = dtypes.NA): """Return elements from `self` or `other` depending on `cond`. Parameters From ac77713f3b84349e2a382e58feb280cb71b7e89e Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Mon, 28 Jul 2025 16:21:06 +0200 Subject: [PATCH 17/26] use Any --- xarray/computation/ops.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xarray/computation/ops.py b/xarray/computation/ops.py index fbc14040671..9c7fd96c12f 100644 --- a/xarray/computation/ops.py +++ b/xarray/computation/ops.py @@ -17,9 +17,6 @@ if TYPE_CHECKING: from typing import Any - from xarray.core.dataarray import DataArray - from xarray.core.dataset import Dataset - try: import bottleneck as bn @@ -164,7 +161,8 @@ def fillna(data, other, join="left", dataset_join="left"): ) -def where_method(self, cond: DataArray | Dataset, other: Any = dtypes.NA): +# TODO: type this properly +def where_method(self: Any, cond: Any, other: Any = dtypes.NA): """Return elements from `self` or `other` depending on `cond`. Parameters From bfbe24485fe5d59e2571d170614ff014f545a7f1 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 29 Jul 2025 16:15:20 +0200 Subject: [PATCH 18/26] chore: add comment --- xarray/core/extension_array.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py index 7a51ba09084..134d3dd8cee 100644 --- a/xarray/core/extension_array.py +++ b/xarray/core/extension_array.py @@ -99,7 +99,8 @@ def __post_init__(self): raise TypeError(f"{self.array} is not an pandas ExtensionArray.") # This does not use the UNSUPPORTED_EXTENSION_ARRAY_TYPES whitelist because # we do support extension arrays from datetime, for example, that need - # duck array support internally via this class. + # duck array support internally via this class. These can appear from `DatetimeIndex` + # wrapped by `PandasIndex` internally, for example. if not is_allowed_extension_array(self.array): raise TypeError( f"{self.array.dtype!r} should be converted to a numpy array in `xarray` internally." From 594c164ef61378eb758ee8e5648bec8a9da0c120 Mon Sep 17 00:00:00 2001 From: Ilan Gold Date: Tue, 29 Jul 2025 16:17:10 +0200 Subject: [PATCH 19/26] Update xarray/core/utils.py Co-authored-by: Deepak Cherian --- xarray/core/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 2de57b6d213..386f1e346de 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -110,7 +110,7 @@ def is_allowed_extension_array_dtype(dtype: Any): ) -def is_allowed_extension_array(array: Any): +def is_allowed_extension_array(array: Any) -> bool: return ( hasattr(array, "dtype") and is_allowed_extension_array_dtype(array.dtype) From a436d787d226fa817eb22d8bdeaa5908052bf88b Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 29 Jul 2025 16:21:02 +0200 Subject: [PATCH 20/26] refactor: use is_allowed_extension_array more --- xarray/core/extension_array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/extension_array.py b/xarray/core/extension_array.py index 134d3dd8cee..9262982d4cb 100644 --- a/xarray/core/extension_array.py +++ b/xarray/core/extension_array.py @@ -126,7 +126,7 @@ def replace_duck_with_extension_array(args) -> list: if func not in HANDLED_EXTENSION_ARRAY_FUNCTIONS: raise KeyError("Function not registered for pandas extension arrays.") res = HANDLED_EXTENSION_ARRAY_FUNCTIONS[func](*args, **kwargs) - if pd.api.types.is_extension_array_dtype(res): # noqa: TID251 + if is_allowed_extension_array(res): return PandasExtensionArray(res) return res @@ -135,7 +135,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): def __getitem__(self, key) -> PandasExtensionArray[T_ExtensionArray]: item = self.array[key] - if pd.api.types.is_extension_array_dtype(item): # noqa: TID251 + if is_allowed_extension_array(item): return PandasExtensionArray(item) if np.isscalar(item) or isinstance(key, int): return PandasExtensionArray(type(self.array)._from_sequence([item])) # type: ignore[call-arg,attr-defined,unused-ignore] From 5e603a793580ea086466a9cbd282732484c0a552 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 29 Jul 2025 16:23:31 +0200 Subject: [PATCH 21/26] fix: remove one of the loops --- xarray/core/duck_array_ops.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 7687e452496..b8a4011a72e 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -265,12 +265,12 @@ def asarray(data, xp=np, dtype=None): def as_shared_dtype(scalars_or_arrays, xp=None): """Cast arrays to a shared dtype using xarray's type promotion rules.""" - if any(pd.api.types.is_extension_array_dtype(x) for x in scalars_or_arrays): # noqa: TID251 - extension_array_types = [ - x.dtype - for x in scalars_or_arrays - if pd.api.types.is_extension_array_dtype(x) # noqa: TID251 - ] + extension_array_types = [ + x.dtype + for x in scalars_or_arrays + if pd.api.types.is_extension_array_dtype(x) # noqa: TID251 + ] + if len(extension_array_types) >= 1: non_nans = [x for x in scalars_or_arrays if not isna(x)] if len(extension_array_types) == len(non_nans) and all( isinstance(x, type(extension_array_types[0])) for x in extension_array_types From a8d1aa17189aa0193469dbe17fbd83419a638ac6 Mon Sep 17 00:00:00 2001 From: Ilan Gold Date: Tue, 29 Jul 2025 17:51:39 +0200 Subject: [PATCH 22/26] Update xarray/computation/ops.py Co-authored-by: Deepak Cherian --- xarray/computation/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/computation/ops.py b/xarray/computation/ops.py index 9c7fd96c12f..49e7406f409 100644 --- a/xarray/computation/ops.py +++ b/xarray/computation/ops.py @@ -162,7 +162,7 @@ def fillna(data, other, join="left", dataset_join="left"): # TODO: type this properly -def where_method(self: Any, cond: Any, other: Any = dtypes.NA): +def where_method(self, cond, other=dtypes.NA): # type: ignore[has-type] """Return elements from `self` or `other` depending on `cond`. Parameters From bc27b15731265d000ca36fcc2a66a5b5aac55d0a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Jul 2025 15:52:00 +0000 Subject: [PATCH 23/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/computation/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/computation/ops.py b/xarray/computation/ops.py index 49e7406f409..7d1d933af79 100644 --- a/xarray/computation/ops.py +++ b/xarray/computation/ops.py @@ -15,7 +15,7 @@ from xarray.core import dtypes, duck_array_ops if TYPE_CHECKING: - from typing import Any + pass try: import bottleneck as bn From f914dc701c9c7dea2055095e18f0d74373d5792b Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 29 Jul 2025 18:25:20 +0200 Subject: [PATCH 24/26] fix: result handling --- xarray/core/variable.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9f2d00f879f..8ff155e8c87 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -218,15 +218,15 @@ def _possibly_convert_objects(values): """ as_series = pd.Series(values.ravel(), copy=False) # For why we need this behavior: https://github.com/pandas-dev/pandas/issues/61938 + result = np.asarray(as_series).reshape(values.shape) if ( result.dtype.kind == "O" and values.dtype.kind == "O" and Version(pd.__version__) >= Version("3.0.0dev0") ): + # need to copy to be able to override `dtype` result = np.asarray(as_series, copy=True).reshape(values.shape) result.dtype = values.dtype - else: - result = np.asarray(as_series).reshape(values.shape) if not result.flags.writeable: # GH8843, pandas copy-on-write mode creates read-only arrays by default try: From 8c2f73ec830aca9dda5db502bf3cd0d6f642a3dd Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 30 Jul 2025 16:47:39 +0200 Subject: [PATCH 25/26] fix: avoid extra copy + comment --- xarray/core/variable.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 8ff155e8c87..031ca2c3fde 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -209,7 +209,10 @@ def _maybe_wrap_data(data): def _possibly_convert_objects(values): """Convert object arrays into datetime64 and timedelta64 according - to the pandas convention. + to the pandas convention. For backwards compat, as of 3.0.0 pandas, + object dtype inputs are cast to strings by `pandas.Series` + but we output them as object dtype with the input metadata preserved as well. + * datetime.datetime * datetime.timedelta @@ -217,22 +220,25 @@ def _possibly_convert_objects(values): * pd.Timedelta """ as_series = pd.Series(values.ravel(), copy=False) - # For why we need this behavior: https://github.com/pandas-dev/pandas/issues/61938 result = np.asarray(as_series).reshape(values.shape) + if not result.flags.writeable: + # GH8843, pandas copy-on-write mode creates read-only arrays by default + try: + result.flags.writeable = True + except ValueError: + result = result.copy() + # For why we need this behavior: https://github.com/pandas-dev/pandas/issues/61938 + # Object datatype inputs that are strings + # will be converted to strings by `pandas.Series`, and as of 3.0.0, lose + # metadata. If the roundtrip back to numpy in this function yields an + # object array again, the dtype.metadata will be preserved. if ( result.dtype.kind == "O" and values.dtype.kind == "O" and Version(pd.__version__) >= Version("3.0.0dev0") ): # need to copy to be able to override `dtype` - result = np.asarray(as_series, copy=True).reshape(values.shape) result.dtype = values.dtype - if not result.flags.writeable: - # GH8843, pandas copy-on-write mode creates read-only arrays by default - try: - result.flags.writeable = True - except ValueError: - result = result.copy() return result From 3cf3b884f15ec3c3ef191e49c48c4169135c87b2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 30 Jul 2025 08:58:23 -0600 Subject: [PATCH 26/26] Apply suggestions from code review --- xarray/core/variable.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 031ca2c3fde..06d7218fe7c 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -230,14 +230,13 @@ def _possibly_convert_objects(values): # For why we need this behavior: https://github.com/pandas-dev/pandas/issues/61938 # Object datatype inputs that are strings # will be converted to strings by `pandas.Series`, and as of 3.0.0, lose - # metadata. If the roundtrip back to numpy in this function yields an + # `dtype.metadata`. If the roundtrip back to numpy in this function yields an # object array again, the dtype.metadata will be preserved. if ( result.dtype.kind == "O" and values.dtype.kind == "O" and Version(pd.__version__) >= Version("3.0.0dev0") ): - # need to copy to be able to override `dtype` result.dtype = values.dtype return result