Skip to content

Commit 83fd9ba

Browse files
TST (string dtype): remove usage of 'string[pyarrow_numpy]' alias (#59758)
1 parent b717abb commit 83fd9ba

29 files changed

+119
-134
lines changed

pandas/conftest.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,6 +1272,34 @@ def string_dtype(request):
12721272
return request.param
12731273

12741274

1275+
@pytest.fixture(
1276+
params=[
1277+
("python", pd.NA),
1278+
pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
1279+
pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
1280+
("python", np.nan),
1281+
],
1282+
ids=[
1283+
"string=string[python]",
1284+
"string=string[pyarrow]",
1285+
"string=str[pyarrow]",
1286+
"string=str[python]",
1287+
],
1288+
)
1289+
def string_dtype_no_object(request):
1290+
"""
1291+
Parametrized fixture for string dtypes.
1292+
* 'string[python]' (NA variant)
1293+
* 'string[pyarrow]' (NA variant)
1294+
* 'str' (NaN variant, with pyarrow)
1295+
* 'str' (NaN variant, without pyarrow)
1296+
"""
1297+
# need to instantiate the StringDtype here instead of in the params
1298+
# to avoid importing pyarrow during test collection
1299+
storage, na_value = request.param
1300+
return pd.StringDtype(storage, na_value)
1301+
1302+
12751303
@pytest.fixture(
12761304
params=[
12771305
"string[python]",

pandas/tests/apply/test_numba.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import pandas.util._test_decorators as td
77

8+
import pandas as pd
89
from pandas import (
910
DataFrame,
1011
Index,
@@ -29,11 +30,10 @@ def test_numba_vs_python_noop(float_frame, apply_axis):
2930

3031
def test_numba_vs_python_string_index():
3132
# GH#56189
32-
pytest.importorskip("pyarrow")
3333
df = DataFrame(
3434
1,
35-
index=Index(["a", "b"], dtype="string[pyarrow_numpy]"),
36-
columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
35+
index=Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan)),
36+
columns=Index(["x", "y"], dtype=pd.StringDtype(na_value=np.nan)),
3737
)
3838
func = lambda x: x
3939
result = df.apply(func, engine="numba", axis=0)

pandas/tests/arrays/string_/test_string_arrow.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,11 @@ def test_setitem_invalid_indexer_raises():
241241
arr[[0, 1]] = ["foo", "bar", "baz"]
242242

243243

244-
@pytest.mark.parametrize("dtype", ["string[pyarrow]", "string[pyarrow_numpy]"])
245-
def test_pickle_roundtrip(dtype):
244+
@pytest.mark.parametrize("na_value", [pd.NA, np.nan])
245+
def test_pickle_roundtrip(na_value):
246246
# GH 42600
247247
pytest.importorskip("pyarrow")
248+
dtype = StringDtype("pyarrow", na_value=na_value)
248249
expected = pd.Series(range(10), dtype=dtype)
249250
expected_sliced = expected.head(2)
250251
full_pickled = pickle.dumps(expected)

pandas/tests/base/test_misc.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,7 @@ def test_access_by_position(index_flat):
183183
assert index[-1] == index[size - 1]
184184

185185
msg = f"index {size} is out of bounds for axis 0 with size {size}"
186-
if is_dtype_equal(index.dtype, "string[pyarrow]") or is_dtype_equal(
187-
index.dtype, "string[pyarrow_numpy]"
188-
):
186+
if isinstance(index.dtype, pd.StringDtype) and index.dtype.storage == "pyarrow":
189187
msg = "index out of bounds"
190188
with pytest.raises(IndexError, match=msg):
191189
index[size]

pandas/tests/frame/indexing/test_indexing.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,13 +1864,11 @@ def test_adding_new_conditional_column() -> None:
18641864
("dtype", "infer_string"),
18651865
[
18661866
(object, False),
1867-
("string[pyarrow_numpy]", True),
1867+
(pd.StringDtype(na_value=np.nan), True),
18681868
],
18691869
)
18701870
def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
18711871
# https://github.com/pandas-dev/pandas/issues/56204
1872-
pytest.importorskip("pyarrow")
1873-
18741872
df = DataFrame({"a": [1, 2], "b": [3, 4]})
18751873
with pd.option_context("future.infer_string", infer_string):
18761874
df.loc[df["a"] == 1, "c"] = "1"
@@ -1880,16 +1878,14 @@ def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
18801878
tm.assert_frame_equal(df, expected)
18811879

18821880

1883-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
18841881
def test_add_new_column_infer_string():
18851882
# GH#55366
1886-
pytest.importorskip("pyarrow")
18871883
df = DataFrame({"x": [1]})
18881884
with pd.option_context("future.infer_string", True):
18891885
df.loc[df["x"] == 1, "y"] = "1"
18901886
expected = DataFrame(
1891-
{"x": [1], "y": Series(["1"], dtype="string[pyarrow_numpy]")},
1892-
columns=Index(["x", "y"], dtype=object),
1887+
{"x": [1], "y": Series(["1"], dtype=pd.StringDtype(na_value=np.nan))},
1888+
columns=Index(["x", "y"], dtype="str"),
18931889
)
18941890
tm.assert_frame_equal(df, expected)
18951891

pandas/tests/frame/methods/test_rank.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
)
1515
from pandas.compat import HAS_PYARROW
1616

17+
import pandas as pd
1718
from pandas import (
1819
DataFrame,
1920
Index,
@@ -502,14 +503,13 @@ def test_rank_mixed_axis_zero(self, data, expected):
502503
result = df.rank(numeric_only=True)
503504
tm.assert_frame_equal(result, expected)
504505

505-
@pytest.mark.parametrize(
506-
"dtype, exp_dtype",
507-
[("string[pyarrow]", "Int64"), ("string[pyarrow_numpy]", "float64")],
508-
)
509-
def test_rank_string_dtype(self, dtype, exp_dtype):
506+
def test_rank_string_dtype(self, string_dtype_no_object):
510507
# GH#55362
511-
pytest.importorskip("pyarrow")
512-
obj = Series(["foo", "foo", None, "foo"], dtype=dtype)
508+
obj = Series(["foo", "foo", None, "foo"], dtype=string_dtype_no_object)
513509
result = obj.rank(method="first")
510+
exp_dtype = "Int64" if string_dtype_no_object.na_value is pd.NA else "float64"
511+
if string_dtype_no_object.storage == "python":
512+
# TODO nullable string[python] should also return nullable Int64
513+
exp_dtype = "float64"
514514
expected = Series([1, 2, None, 3], dtype=exp_dtype)
515515
tm.assert_series_equal(result, expected)

pandas/tests/frame/test_constructors.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,8 +2655,7 @@ def test_construct_with_strings_and_none(self):
26552655

26562656
def test_frame_string_inference(self):
26572657
# GH#54430
2658-
pytest.importorskip("pyarrow")
2659-
dtype = "string[pyarrow_numpy]"
2658+
dtype = pd.StringDtype(na_value=np.nan)
26602659
expected = DataFrame(
26612660
{"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
26622661
)
@@ -2690,8 +2689,7 @@ def test_frame_string_inference(self):
26902689

26912690
def test_frame_string_inference_array_string_dtype(self):
26922691
# GH#54496
2693-
pytest.importorskip("pyarrow")
2694-
dtype = "string[pyarrow_numpy]"
2692+
dtype = pd.StringDtype(na_value=np.nan)
26952693
expected = DataFrame(
26962694
{"a": ["a", "b"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
26972695
)
@@ -2715,7 +2713,6 @@ def test_frame_string_inference_array_string_dtype(self):
27152713

27162714
def test_frame_string_inference_block_dim(self):
27172715
# GH#55363
2718-
pytest.importorskip("pyarrow")
27192716
with pd.option_context("future.infer_string", True):
27202717
df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
27212718
assert df._mgr.blocks[0].ndim == 2

pandas/tests/groupby/methods/test_size.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33

44
from pandas._config import using_string_dtype
55

6-
import pandas.util._test_decorators as td
7-
86
from pandas import (
97
DataFrame,
108
Index,
@@ -79,16 +77,9 @@ def test_size_series_masked_type_returns_Int64(dtype):
7977

8078

8179
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
82-
@pytest.mark.parametrize(
83-
"dtype",
84-
[
85-
object,
86-
pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
87-
pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
88-
],
89-
)
90-
def test_size_strings(dtype):
80+
def test_size_strings(any_string_dtype):
9181
# GH#55627
82+
dtype = any_string_dtype
9283
df = DataFrame({"a": ["a", "a", "b"], "b": "a"}, dtype=dtype)
9384
result = df.groupby("a")["b"].size()
9485
exp_dtype = "Int64" if dtype == "string[pyarrow]" else "int64"

pandas/tests/groupby/methods/test_value_counts.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
import pandas.util._test_decorators as td
11-
1210
from pandas import (
1311
Categorical,
1412
CategoricalIndex,
@@ -373,14 +371,6 @@ def test_against_frame_and_seriesgroupby(
373371
tm.assert_frame_equal(result, expected)
374372

375373

376-
@pytest.mark.parametrize(
377-
"dtype",
378-
[
379-
object,
380-
pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
381-
pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
382-
],
383-
)
384374
@pytest.mark.parametrize("normalize", [True, False])
385375
@pytest.mark.parametrize(
386376
"sort, ascending, expected_rows, expected_count, expected_group_size",
@@ -398,9 +388,10 @@ def test_compound(
398388
expected_rows,
399389
expected_count,
400390
expected_group_size,
401-
dtype,
391+
any_string_dtype,
402392
using_infer_string,
403393
):
394+
dtype = any_string_dtype
404395
education_df = education_df.astype(dtype)
405396
education_df.columns = education_df.columns.astype(dtype)
406397
# Multiple groupby keys and as_index=False
@@ -417,6 +408,7 @@ def test_compound(
417408
expected["proportion"] = expected_count
418409
expected["proportion"] /= expected_group_size
419410
if dtype == "string[pyarrow]":
411+
# TODO(nullable) also string[python] should return nullable dtypes
420412
expected["proportion"] = expected["proportion"].convert_dtypes()
421413
else:
422414
expected["count"] = expected_count

pandas/tests/groupby/test_groupby.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2466,20 +2466,13 @@ def test_rolling_wrong_param_min_period():
24662466
test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum()
24672467

24682468

2469-
@pytest.mark.parametrize(
2470-
"dtype",
2471-
[
2472-
object,
2473-
pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
2474-
],
2475-
)
2476-
def test_by_column_values_with_same_starting_value(dtype):
2469+
def test_by_column_values_with_same_starting_value(any_string_dtype):
24772470
# GH29635
24782471
df = DataFrame(
24792472
{
24802473
"Name": ["Thomas", "Thomas", "Thomas John"],
24812474
"Credit": [1200, 1300, 900],
2482-
"Mood": Series(["sad", "happy", "happy"], dtype=dtype),
2475+
"Mood": Series(["sad", "happy", "happy"], dtype=any_string_dtype),
24832476
}
24842477
)
24852478
aggregate_details = {"Mood": Series.mode, "Credit": "sum"}

0 commit comments

Comments
 (0)