From dcbacabafaef1d02afa63ee75babc7f08ab6af1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <6618166+twoertwein@users.noreply.github.com> Date: Thu, 4 Jan 2024 20:18:17 -0500 Subject: [PATCH 1/5] TYP: mostly DataFrame return overloads --- doc/source/user_guide/basics.rst | 10 +- doc/source/user_guide/indexing.rst | 2 +- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/base.py | 5 +- pandas/core/frame.py | 478 +++++++++++++++++++-- pandas/core/generic.py | 13 + pandas/core/reshape/encoding.py | 7 +- pandas/core/series.py | 81 +++- pandas/io/formats/info.py | 8 +- pandas/tests/apply/test_frame_apply.py | 2 +- pandas/tests/apply/test_str.py | 11 +- pandas/tests/frame/methods/test_asof.py | 6 +- pandas/tests/frame/methods/test_fillna.py | 2 +- pandas/tests/frame/test_reductions.py | 24 +- pandas/tests/reductions/test_reductions.py | 2 +- pandas/tests/series/test_api.py | 2 +- pandas/tests/test_multilevel.py | 2 +- 17 files changed, 579 insertions(+), 77 deletions(-) diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index f7d89110e6c8f..c416245e4f318 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -477,15 +477,15 @@ For example: .. ipython:: python df - df.mean(0) - df.mean(1) + df.mean(axis=0) + df.mean(axis=1) All such methods have a ``skipna`` option signaling whether to exclude missing data (``True`` by default): .. ipython:: python - df.sum(0, skipna=False) + df.sum(axis=0, skipna=False) df.sum(axis=1, skipna=True) Combined with the broadcasting / arithmetic behavior, one can describe various @@ -496,8 +496,8 @@ standard deviation of 1), very concisely: ts_stand = (df - df.mean()) / df.std() ts_stand.std() - xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0) - xs_stand.std(1) + xs_stand = df.sub(df.mean(axis=1), axis=0).div(df.std(axis=1), axis=0) + xs_stand.std(axis=1) Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` preserve the location of ``NaN`` values. This is somewhat different from diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 7c8d3b9e1c869..ded5e1741a4f0 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -967,7 +967,7 @@ To select a row where each column meets its own criterion: values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]} - row_mask = df.isin(values).all(1) + row_mask = df.isin(values).all(axis=1) df[row_mask] diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 25163a0f678b0..f15e0737866e9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -95,6 +95,7 @@ Deprecations ~~~~~~~~~~~~ - Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`) - Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`) +- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt` (:issue:`56739`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/base.py b/pandas/core/base.py index 490daa656f603..c9d4ead8bc10f 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -8,6 +8,7 @@ from typing import ( TYPE_CHECKING, Any, + Callable, Generic, Literal, cast, @@ -106,7 +107,7 @@ class PandasObject(DirNamesMixin): _cache: dict[str, Any] @property - def _constructor(self): + def _constructor(self) -> Callable[..., Self]: """ Class constructor (for this class it's just `__class__`). """ @@ -795,7 +796,7 @@ def argmin( # "int") return result # type: ignore[return-value] - def tolist(self): + def tolist(self) -> list: """ Return a list of the values. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 207e3e7635cac..f026d426d4f7b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10803,7 +10803,7 @@ def round( """ from pandas.core.reshape.concat import concat - def _dict_round(df: DataFrame, decimals): + def _dict_round(df: DataFrame, decimals) -> Iterator[Series]: for col, vals in df.items(): try: yield _series_round(vals, decimals[col]) @@ -11232,7 +11232,7 @@ def c(x): # ---------------------------------------------------------------------- # ndarray-like stats methods - def count(self, axis: Axis = 0, numeric_only: bool = False): + def count(self, axis: Axis = 0, numeric_only: bool = False) -> Series: """ Count non-NA cells for each column or row. @@ -11475,9 +11475,42 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: res_ser = self._constructor_sliced(result, index=self.index, copy=False) return res_ser - @doc(make_doc("any", ndim=2)) # error: Signature of "any" incompatible with supertype "NDFrame" - def any( # type: ignore[override] + @overload # type: ignore[override] + def any( + self, + *, + axis: Axis = ..., + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def any( + self, + *, + axis: None, + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> bool: + ... + + @overload + def any( + self, + *, + axis: Axis | None, + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> Series | bool: + ... + + @doc(make_doc("any", ndim=2)) + def any( self, *, axis: Axis | None = 0, @@ -11492,6 +11525,40 @@ def any( # type: ignore[override] result = result.__finalize__(self, method="any") return result + @overload + def all( + self, + *, + axis: Axis = ..., + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def all( + self, + *, + axis: None, + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> bool: + ... + + @overload + def all( + self, + *, + axis: Axis | None, + bool_only: bool = ..., + skipna: bool = ..., + **kwargs, + ) -> Series | bool: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all") @doc(make_doc("all", ndim=2)) def all( self, @@ -11507,6 +11574,41 @@ def all( result = result.__finalize__(self, method="all") return result + # error: Signature of "min" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def min( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def min( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def min( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min") @doc(make_doc("min", ndim=2)) def min( self, @@ -11514,12 +11616,49 @@ def min( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().min(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().min( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="min") return result + # error: Signature of "max" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def max( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def max( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def max( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max") @doc(make_doc("max", ndim=2)) def max( self, @@ -11527,12 +11666,15 @@ def max( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().max(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().max( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="max") return result + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum") @doc(make_doc("sum", ndim=2)) def sum( self, @@ -11541,10 +11683,17 @@ def sum( numeric_only: bool = False, min_count: int = 0, **kwargs, - ): - result = super().sum(axis, skipna, numeric_only, min_count, **kwargs) + ) -> Series: + result = super().sum( + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + **kwargs, + ) return result.__finalize__(self, method="sum") + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod") @doc(make_doc("prod", ndim=2)) def prod( self, @@ -11553,10 +11702,51 @@ def prod( numeric_only: bool = False, min_count: int = 0, **kwargs, - ): - result = super().prod(axis, skipna, numeric_only, min_count, **kwargs) + ) -> Series: + result = super().prod( + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + **kwargs, + ) return result.__finalize__(self, method="prod") + # error: Signature of "mean" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def mean( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def mean( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def mean( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean") @doc(make_doc("mean", ndim=2)) def mean( self, @@ -11564,12 +11754,49 @@ def mean( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().mean(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().mean( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="mean") return result + # error: Signature of "median" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def median( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def median( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def median( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median") @doc(make_doc("median", ndim=2)) def median( self, @@ -11577,12 +11804,52 @@ def median( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().median(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().median( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="median") return result + # error: Signature of "sem" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def sem( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def sem( + self, + *, + axis: None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def sem( + self, + *, + axis: Axis | None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem") @doc(make_doc("sem", ndim=2)) def sem( self, @@ -11591,12 +11858,52 @@ def sem( ddof: int = 1, numeric_only: bool = False, **kwargs, - ): - result = super().sem(axis, skipna, ddof, numeric_only, **kwargs) + ) -> Series | Any: + result = super().sem( + axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="sem") return result + # error: Signature of "var" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def var( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def var( + self, + *, + axis: None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def var( + self, + *, + axis: Axis | None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var") @doc(make_doc("var", ndim=2)) def var( self, @@ -11605,12 +11912,52 @@ def var( ddof: int = 1, numeric_only: bool = False, **kwargs, - ): - result = super().var(axis, skipna, ddof, numeric_only, **kwargs) + ) -> Series | Any: + result = super().var( + axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="var") return result + # error: Signature of "std" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def std( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def std( + self, + *, + axis: None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def std( + self, + *, + axis: Axis | None, + skipna: bool = ..., + ddof: int = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std") @doc(make_doc("std", ndim=2)) def std( self, @@ -11619,12 +11966,49 @@ def std( ddof: int = 1, numeric_only: bool = False, **kwargs, - ): - result = super().std(axis, skipna, ddof, numeric_only, **kwargs) + ) -> Series | Any: + result = super().std( + axis=axis, skipna=skipna, ddof=ddof, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="std") return result + # error: Signature of "skew" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def skew( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def skew( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def skew( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew") @doc(make_doc("skew", ndim=2)) def skew( self, @@ -11632,12 +12016,49 @@ def skew( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().skew(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().skew( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="skew") return result + # error: Signature of "kurt" incompatible with supertype "NDFrame" + @overload # type: ignore[override] + def kurt( + self, + *, + axis: Axis = ..., + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series: + ... + + @overload + def kurt( + self, + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: + ... + + @overload + def kurt( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: + ... + + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt") @doc(make_doc("kurt", ndim=2)) def kurt( self, @@ -11645,13 +12066,16 @@ def kurt( skipna: bool = True, numeric_only: bool = False, **kwargs, - ): - result = super().kurt(axis, skipna, numeric_only, **kwargs) + ) -> Series | Any: + result = super().kurt( + axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) if isinstance(result, Series): result = result.__finalize__(self, method="kurt") return result - kurtosis = kurt + # error: Incompatible types in assignment + kurtosis = kurt # type: ignore[assignment] product = prod @doc(make_doc("cummin", ndim=2)) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 78fbb66635dd1..4871d98ffc160 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12162,6 +12162,7 @@ def _logical_func( def any( self, + *, axis: Axis | None = 0, bool_only: bool_t = False, skipna: bool_t = True, @@ -12173,6 +12174,7 @@ def any( def all( self, + *, axis: Axis = 0, bool_only: bool_t = False, skipna: bool_t = True, @@ -12276,6 +12278,7 @@ def _stat_function_ddof( def sem( self, + *, axis: Axis | None = 0, skipna: bool_t = True, ddof: int = 1, @@ -12288,6 +12291,7 @@ def sem( def var( self, + *, axis: Axis | None = 0, skipna: bool_t = True, ddof: int = 1, @@ -12300,6 +12304,7 @@ def var( def std( self, + *, axis: Axis | None = 0, skipna: bool_t = True, ddof: int = 1, @@ -12331,6 +12336,7 @@ def _stat_function( def min( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12347,6 +12353,7 @@ def min( def max( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12363,6 +12370,7 @@ def max( def mean( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12374,6 +12382,7 @@ def mean( def median( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12385,6 +12394,7 @@ def median( def skew( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12396,6 +12406,7 @@ def skew( def kurt( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12448,6 +12459,7 @@ def _min_count_stat_function( def sum( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12460,6 +12472,7 @@ def sum( def prod( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 2c74538175a58..77d2b732cc4af 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -6,10 +6,7 @@ Iterable, ) import itertools -from typing import ( - TYPE_CHECKING, - cast, -) +from typing import TYPE_CHECKING import numpy as np @@ -482,7 +479,7 @@ def from_dummies( f"Received 'data' of type: {type(data).__name__}" ) - col_isna_mask = cast(Series, data.isna().any()) + col_isna_mask = data.isna().any() if col_isna_mask.any(): raise ValueError( diff --git a/pandas/core/series.py b/pandas/core/series.py index 657b384c57235..3a7e8de2b343b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6454,6 +6454,7 @@ def any( # type: ignore[override] filter_type="bool", ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all") @Appender(make_doc("all", ndim=1)) def all( self, @@ -6473,6 +6474,7 @@ def all( filter_type="bool", ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min") @doc(make_doc("min", ndim=1)) def min( self, @@ -6481,8 +6483,11 @@ def min( numeric_only: bool = False, **kwargs, ): - return NDFrame.min(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.min( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max") @doc(make_doc("max", ndim=1)) def max( self, @@ -6491,8 +6496,11 @@ def max( numeric_only: bool = False, **kwargs, ): - return NDFrame.max(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.max( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum") @doc(make_doc("sum", ndim=1)) def sum( self, @@ -6502,8 +6510,16 @@ def sum( min_count: int = 0, **kwargs, ): - return NDFrame.sum(self, axis, skipna, numeric_only, min_count, **kwargs) + return NDFrame.sum( + self, + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + **kwargs, + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod") @doc(make_doc("prod", ndim=1)) def prod( self, @@ -6513,8 +6529,16 @@ def prod( min_count: int = 0, **kwargs, ): - return NDFrame.prod(self, axis, skipna, numeric_only, min_count, **kwargs) + return NDFrame.prod( + self, + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + **kwargs, + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean") @doc(make_doc("mean", ndim=1)) def mean( self, @@ -6523,8 +6547,11 @@ def mean( numeric_only: bool = False, **kwargs, ): - return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.mean( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median") @doc(make_doc("median", ndim=1)) def median( self, @@ -6533,8 +6560,11 @@ def median( numeric_only: bool = False, **kwargs, ): - return NDFrame.median(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.median( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem") @doc(make_doc("sem", ndim=1)) def sem( self, @@ -6544,8 +6574,16 @@ def sem( numeric_only: bool = False, **kwargs, ): - return NDFrame.sem(self, axis, skipna, ddof, numeric_only, **kwargs) + return NDFrame.sem( + self, + axis=axis, + skipna=skipna, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var") @doc(make_doc("var", ndim=1)) def var( self, @@ -6555,8 +6593,16 @@ def var( numeric_only: bool = False, **kwargs, ): - return NDFrame.var(self, axis, skipna, ddof, numeric_only, **kwargs) + return NDFrame.var( + self, + axis=axis, + skipna=skipna, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std") @doc(make_doc("std", ndim=1)) def std( self, @@ -6566,8 +6612,16 @@ def std( numeric_only: bool = False, **kwargs, ): - return NDFrame.std(self, axis, skipna, ddof, numeric_only, **kwargs) + return NDFrame.std( + self, + axis=axis, + skipna=skipna, + ddof=ddof, + numeric_only=numeric_only, + **kwargs, + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew") @doc(make_doc("skew", ndim=1)) def skew( self, @@ -6576,8 +6630,11 @@ def skew( numeric_only: bool = False, **kwargs, ): - return NDFrame.skew(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.skew( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt") @doc(make_doc("kurt", ndim=1)) def kurt( self, @@ -6586,7 +6643,9 @@ def kurt( numeric_only: bool = False, **kwargs, ): - return NDFrame.kurt(self, axis, skipna, numeric_only, **kwargs) + return NDFrame.kurt( + self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs + ) kurtosis = kurt product = prod diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 552affbd053f2..2d28b032ca49d 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -392,7 +392,7 @@ def dtype_counts(self) -> Mapping[str, int]: @property @abstractmethod - def non_null_counts(self) -> Sequence[int]: + def non_null_counts(self) -> list[int] | Series: """Sequence of non-null counts for all columns or column (if series).""" @property @@ -486,7 +486,7 @@ def col_count(self) -> int: return len(self.ids) @property - def non_null_counts(self) -> Sequence[int]: + def non_null_counts(self) -> Series: """Sequence of non-null counts for all columns or column (if series).""" return self.data.count() @@ -546,7 +546,7 @@ def render( printer.to_buffer(buf) @property - def non_null_counts(self) -> Sequence[int]: + def non_null_counts(self) -> list[int]: return [self.data.count()] @property @@ -750,7 +750,7 @@ def memory_usage_string(self) -> str: return self.info.memory_usage_string @property - def non_null_counts(self) -> Sequence[int]: + def non_null_counts(self) -> list[int] | Series: return self.info.non_null_counts def add_object_type_line(self) -> None: diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 66a43f2ba4bcd..8eae2c42e2dd5 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -402,7 +402,7 @@ def test_apply_yield_list(float_frame): def test_apply_reduce_Series(float_frame): float_frame.iloc[::2, float_frame.columns.get_loc("A")] = np.nan - expected = float_frame.mean(1) + expected = float_frame.mean(axis=1) result = float_frame.apply(np.mean, axis=1) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index e9192dae66a46..0bedfc7e8e1e1 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -38,8 +38,15 @@ def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how): "argument to func", ) ) - result = getattr(float_frame, how)(func, *args, **kwds) - expected = getattr(float_frame, func)(*args, **kwds) + if args: + with pytest.warns( + FutureWarning, match="Starting with pandas version 3.0 all arguments of" + ): + result = getattr(float_frame, how)(func, *args, **kwds) + expected = getattr(float_frame, func)(*args, **kwds) + else: + result = getattr(float_frame, how)(func, *args, **kwds) + expected = getattr(float_frame, func)(*args, **kwds) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index 4a8adf89b3aef..bed126d25eeda 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -36,18 +36,18 @@ def test_basic(self, date_range_frame): dates = date_range("1/1/1990", periods=N * 3, freq="25s") result = df.asof(dates) - assert result.notna().all(1).all() + assert result.notna().all(axis=None) lb = df.index[14] ub = df.index[30] dates = list(dates) result = df.asof(dates) - assert result.notna().all(1).all() + assert result.notna().all(axis=None) mask = (result.index >= lb) & (result.index < ub) rs = result[mask] - assert (rs == 14).all(1).all() + assert (rs == 14).all(axis=None) def test_subset(self, date_range_frame): N = 10 diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 4f661b14ef201..fea5ab350bbc9 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -576,7 +576,7 @@ def test_fillna_dict_series(self): # disable this for now with pytest.raises(NotImplementedError, match="column by column"): - df.fillna(df.max(1), axis=1) + df.fillna(df.max(axis=1), axis=1) def test_fillna_dataframe(self): # GH#8377 diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 63c15fab76562..d3934c73c4769 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -773,7 +773,7 @@ def test_operators_timedelta64(self): tm.assert_series_equal(result, expected) # works when only those columns are selected - result = mixed[["A", "B"]].min(1) + result = mixed[["A", "B"]].min(axis=1) expected = Series([timedelta(days=-1)] * 3) tm.assert_series_equal(result, expected) @@ -832,8 +832,8 @@ def test_std_datetime64_with_nat(self, values, skipna, request, unit): def test_sum_corner(self): empty_frame = DataFrame() - axis0 = empty_frame.sum(0) - axis1 = empty_frame.sum(1) + axis0 = empty_frame.sum(axis=0) + axis1 = empty_frame.sum(axis=1) assert isinstance(axis0, Series) assert isinstance(axis1, Series) assert len(axis0) == 0 @@ -967,8 +967,8 @@ def test_sum_object(self, float_frame): def test_sum_bool(self, float_frame): # ensure this works, bug report bools = np.isnan(float_frame) - bools.sum(1) - bools.sum(0) + bools.sum(axis=1) + bools.sum(axis=0) def test_sum_mixed_datetime(self): # GH#30886 @@ -990,7 +990,7 @@ def test_mean_corner(self, float_frame, float_string_frame): # take mean of boolean column float_frame["bool"] = float_frame["A"] > 0 - means = float_frame.mean(0) + means = float_frame.mean(axis=0) assert means["bool"] == float_frame["bool"].values.mean() def test_mean_datetimelike(self): @@ -1043,13 +1043,13 @@ def test_mean_extensionarray_numeric_only_true(self): def test_stats_mixed_type(self, float_string_frame): with pytest.raises(TypeError, match="could not convert"): - float_string_frame.std(1) + float_string_frame.std(axis=1) with pytest.raises(TypeError, match="could not convert"): - float_string_frame.var(1) + float_string_frame.var(axis=1) with pytest.raises(TypeError, match="unsupported operand type"): - float_string_frame.mean(1) + float_string_frame.mean(axis=1) with pytest.raises(TypeError, match="could not convert"): - float_string_frame.skew(1) + float_string_frame.skew(axis=1) def test_sum_bools(self): df = DataFrame(index=range(1), columns=range(10)) @@ -1329,11 +1329,11 @@ def test_any_all_extra(self): result = df[["A", "B"]].any(axis=1, bool_only=True) tm.assert_series_equal(result, expected) - result = df.all(1) + result = df.all(axis=1) expected = Series([True, False, False], index=["a", "b", "c"]) tm.assert_series_equal(result, expected) - result = df.all(1, bool_only=True) + result = df.all(axis=1, bool_only=True) tm.assert_series_equal(result, expected) # Axis is None diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 20749c7ed90e8..3e402255c3a6f 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -686,7 +686,7 @@ def test_empty(self, method, unit, use_bottleneck, dtype): # GH#844 (changed in GH#9422) df = DataFrame(np.empty((10, 0)), dtype=dtype) - assert (getattr(df, method)(1) == unit).all() + assert (getattr(df, method)(axis=1) == unit).all() s = Series([1], dtype=dtype) result = getattr(s, method)(min_count=2) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 29d6e2036476e..c27252dd4d6c6 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -107,7 +107,7 @@ def test_contains(self, datetime_series): def test_axis_alias(self): s = Series([1, 2, np.nan]) tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) - assert s.dropna().sum("rows") == 3 + assert s.dropna().sum(axis="rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index fda51b157cd75..e25077f6a3ad6 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -141,7 +141,7 @@ def test_multilevel_consolidate(self): df = DataFrame( np.random.default_rng(2).standard_normal((4, 4)), index=index, columns=index ) - df["Totals", ""] = df.sum(1) + df["Totals", ""] = df.sum(axis=1) df = df._consolidate() def test_level_with_tuples(self): From 9de4f705f4ea170ae5aca64fddc704ef51db4905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 8 Feb 2024 20:38:02 -0500 Subject: [PATCH 2/5] remove keyword-only enforcement --- doc/source/user_guide/basics.rst | 10 ++++----- doc/source/user_guide/indexing.rst | 2 +- doc/source/whatsnew/v3.0.0.rst | 1 - pandas/core/frame.py | 12 ----------- pandas/core/series.py | 8 -------- pandas/tests/apply/test_frame_apply.py | 2 +- pandas/tests/apply/test_str.py | 11 ++-------- pandas/tests/frame/methods/test_asof.py | 6 +++--- pandas/tests/frame/methods/test_fillna.py | 2 +- pandas/tests/frame/test_reductions.py | 24 +++++++++++----------- pandas/tests/reductions/test_reductions.py | 2 +- pandas/tests/series/test_api.py | 2 +- pandas/tests/test_multilevel.py | 2 +- 13 files changed, 28 insertions(+), 56 deletions(-) diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index c416245e4f318..f7d89110e6c8f 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -477,15 +477,15 @@ For example: .. ipython:: python df - df.mean(axis=0) - df.mean(axis=1) + df.mean(0) + df.mean(1) All such methods have a ``skipna`` option signaling whether to exclude missing data (``True`` by default): .. ipython:: python - df.sum(axis=0, skipna=False) + df.sum(0, skipna=False) df.sum(axis=1, skipna=True) Combined with the broadcasting / arithmetic behavior, one can describe various @@ -496,8 +496,8 @@ standard deviation of 1), very concisely: ts_stand = (df - df.mean()) / df.std() ts_stand.std() - xs_stand = df.sub(df.mean(axis=1), axis=0).div(df.std(axis=1), axis=0) - xs_stand.std(axis=1) + xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0) + xs_stand.std(1) Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` preserve the location of ``NaN`` values. This is somewhat different from diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index ded5e1741a4f0..7c8d3b9e1c869 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -967,7 +967,7 @@ To select a row where each column meets its own criterion: values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]} - row_mask = df.isin(values).all(axis=1) + row_mask = df.isin(values).all(1) df[row_mask] diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f15e0737866e9..25163a0f678b0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -95,7 +95,6 @@ Deprecations ~~~~~~~~~~~~ - Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`) - Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`) -- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt` (:issue:`56739`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f026d426d4f7b..9df00c7ae3ba3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11558,7 +11558,6 @@ def all( ) -> Series | bool: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all") @doc(make_doc("all", ndim=2)) def all( self, @@ -11608,7 +11607,6 @@ def min( ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min") @doc(make_doc("min", ndim=2)) def min( self, @@ -11658,7 +11656,6 @@ def max( ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max") @doc(make_doc("max", ndim=2)) def max( self, @@ -11674,7 +11671,6 @@ def max( result = result.__finalize__(self, method="max") return result - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum") @doc(make_doc("sum", ndim=2)) def sum( self, @@ -11693,7 +11689,6 @@ def sum( ) return result.__finalize__(self, method="sum") - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod") @doc(make_doc("prod", ndim=2)) def prod( self, @@ -11746,7 +11741,6 @@ def mean( ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean") @doc(make_doc("mean", ndim=2)) def mean( self, @@ -11796,7 +11790,6 @@ def median( ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median") @doc(make_doc("median", ndim=2)) def median( self, @@ -11849,7 +11842,6 @@ def sem( ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem") @doc(make_doc("sem", ndim=2)) def sem( self, @@ -11903,7 +11895,6 @@ def var( ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var") @doc(make_doc("var", ndim=2)) def var( self, @@ -11957,7 +11948,6 @@ def std( ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std") @doc(make_doc("std", ndim=2)) def std( self, @@ -12008,7 +11998,6 @@ def skew( ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew") @doc(make_doc("skew", ndim=2)) def skew( self, @@ -12058,7 +12047,6 @@ def kurt( ) -> Series | Any: ... - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt") @doc(make_doc("kurt", ndim=2)) def kurt( self, diff --git a/pandas/core/series.py b/pandas/core/series.py index 3a7e8de2b343b..83c024962bc8c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6454,7 +6454,6 @@ def any( # type: ignore[override] filter_type="bool", ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all") @Appender(make_doc("all", ndim=1)) def all( self, @@ -6474,7 +6473,6 @@ def all( filter_type="bool", ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min") @doc(make_doc("min", ndim=1)) def min( self, @@ -6487,7 +6485,6 @@ def min( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max") @doc(make_doc("max", ndim=1)) def max( self, @@ -6500,7 +6497,6 @@ def max( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum") @doc(make_doc("sum", ndim=1)) def sum( self, @@ -6519,7 +6515,6 @@ def sum( **kwargs, ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod") @doc(make_doc("prod", ndim=1)) def prod( self, @@ -6538,7 +6533,6 @@ def prod( **kwargs, ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean") @doc(make_doc("mean", ndim=1)) def mean( self, @@ -6551,7 +6545,6 @@ def mean( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median") @doc(make_doc("median", ndim=1)) def median( self, @@ -6564,7 +6557,6 @@ def median( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem") @doc(make_doc("sem", ndim=1)) def sem( self, diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 8eae2c42e2dd5..66a43f2ba4bcd 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -402,7 +402,7 @@ def test_apply_yield_list(float_frame): def test_apply_reduce_Series(float_frame): float_frame.iloc[::2, float_frame.columns.get_loc("A")] = np.nan - expected = float_frame.mean(axis=1) + expected = float_frame.mean(1) result = float_frame.apply(np.mean, axis=1) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 0bedfc7e8e1e1..e9192dae66a46 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -38,15 +38,8 @@ def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how): "argument to func", ) ) - if args: - with pytest.warns( - FutureWarning, match="Starting with pandas version 3.0 all arguments of" - ): - result = getattr(float_frame, how)(func, *args, **kwds) - expected = getattr(float_frame, func)(*args, **kwds) - else: - result = getattr(float_frame, how)(func, *args, **kwds) - expected = getattr(float_frame, func)(*args, **kwds) + result = getattr(float_frame, how)(func, *args, **kwds) + expected = getattr(float_frame, func)(*args, **kwds) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index bed126d25eeda..4a8adf89b3aef 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -36,18 +36,18 @@ def test_basic(self, date_range_frame): dates = date_range("1/1/1990", periods=N * 3, freq="25s") result = df.asof(dates) - assert result.notna().all(axis=None) + assert result.notna().all(1).all() lb = df.index[14] ub = df.index[30] dates = list(dates) result = df.asof(dates) - assert result.notna().all(axis=None) + assert result.notna().all(1).all() mask = (result.index >= lb) & (result.index < ub) rs = result[mask] - assert (rs == 14).all(axis=None) + assert (rs == 14).all(1).all() def test_subset(self, date_range_frame): N = 10 diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index fea5ab350bbc9..4f661b14ef201 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -576,7 +576,7 @@ def test_fillna_dict_series(self): # disable this for now with pytest.raises(NotImplementedError, match="column by column"): - df.fillna(df.max(axis=1), axis=1) + df.fillna(df.max(1), axis=1) def test_fillna_dataframe(self): # GH#8377 diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index d3934c73c4769..63c15fab76562 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -773,7 +773,7 @@ def test_operators_timedelta64(self): tm.assert_series_equal(result, expected) # works when only those columns are selected - result = mixed[["A", "B"]].min(axis=1) + result = mixed[["A", "B"]].min(1) expected = Series([timedelta(days=-1)] * 3) tm.assert_series_equal(result, expected) @@ -832,8 +832,8 @@ def test_std_datetime64_with_nat(self, values, skipna, request, unit): def test_sum_corner(self): empty_frame = DataFrame() - axis0 = empty_frame.sum(axis=0) - axis1 = empty_frame.sum(axis=1) + axis0 = empty_frame.sum(0) + axis1 = empty_frame.sum(1) assert isinstance(axis0, Series) assert isinstance(axis1, Series) assert len(axis0) == 0 @@ -967,8 +967,8 @@ def test_sum_object(self, float_frame): def test_sum_bool(self, float_frame): # ensure this works, bug report bools = np.isnan(float_frame) - bools.sum(axis=1) - bools.sum(axis=0) + bools.sum(1) + bools.sum(0) def test_sum_mixed_datetime(self): # GH#30886 @@ -990,7 +990,7 @@ def test_mean_corner(self, float_frame, float_string_frame): # take mean of boolean column float_frame["bool"] = float_frame["A"] > 0 - means = float_frame.mean(axis=0) + means = float_frame.mean(0) assert means["bool"] == float_frame["bool"].values.mean() def test_mean_datetimelike(self): @@ -1043,13 +1043,13 @@ def test_mean_extensionarray_numeric_only_true(self): def test_stats_mixed_type(self, float_string_frame): with pytest.raises(TypeError, match="could not convert"): - float_string_frame.std(axis=1) + float_string_frame.std(1) with pytest.raises(TypeError, match="could not convert"): - float_string_frame.var(axis=1) + float_string_frame.var(1) with pytest.raises(TypeError, match="unsupported operand type"): - float_string_frame.mean(axis=1) + float_string_frame.mean(1) with pytest.raises(TypeError, match="could not convert"): - float_string_frame.skew(axis=1) + float_string_frame.skew(1) def test_sum_bools(self): df = DataFrame(index=range(1), columns=range(10)) @@ -1329,11 +1329,11 @@ def test_any_all_extra(self): result = df[["A", "B"]].any(axis=1, bool_only=True) tm.assert_series_equal(result, expected) - result = df.all(axis=1) + result = df.all(1) expected = Series([True, False, False], index=["a", "b", "c"]) tm.assert_series_equal(result, expected) - result = df.all(axis=1, bool_only=True) + result = df.all(1, bool_only=True) tm.assert_series_equal(result, expected) # Axis is None diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 3e402255c3a6f..20749c7ed90e8 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -686,7 +686,7 @@ def test_empty(self, method, unit, use_bottleneck, dtype): # GH#844 (changed in GH#9422) df = DataFrame(np.empty((10, 0)), dtype=dtype) - assert (getattr(df, method)(axis=1) == unit).all() + assert (getattr(df, method)(1) == unit).all() s = Series([1], dtype=dtype) result = getattr(s, method)(min_count=2) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index c27252dd4d6c6..29d6e2036476e 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -107,7 +107,7 @@ def test_contains(self, datetime_series): def test_axis_alias(self): s = Series([1, 2, np.nan]) tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) - assert s.dropna().sum(axis="rows") == 3 + assert s.dropna().sum("rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index e25077f6a3ad6..fda51b157cd75 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -141,7 +141,7 @@ def test_multilevel_consolidate(self): df = DataFrame( np.random.default_rng(2).standard_normal((4, 4)), index=index, columns=index ) - df["Totals", ""] = df.sum(axis=1) + df["Totals", ""] = df.sum(1) df = df._consolidate() def test_level_with_tuples(self): From 9343d12ba91394b1e7301258b9d511d7b2d18419 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 9 Feb 2024 21:06:27 -0500 Subject: [PATCH 3/5] revert keyword-only in NDFrame --- pandas/core/generic.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f458e058c9755..3c71784ad81c4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11805,7 +11805,6 @@ def _logical_func( def any( self, - *, axis: Axis | None = 0, bool_only: bool_t = False, skipna: bool_t = True, @@ -11817,7 +11816,6 @@ def any( def all( self, - *, axis: Axis = 0, bool_only: bool_t = False, skipna: bool_t = True, @@ -11921,7 +11919,6 @@ def _stat_function_ddof( def sem( self, - *, axis: Axis | None = 0, skipna: bool_t = True, ddof: int = 1, @@ -11934,7 +11931,6 @@ def sem( def var( self, - *, axis: Axis | None = 0, skipna: bool_t = True, ddof: int = 1, @@ -11947,7 +11943,6 @@ def var( def std( self, - *, axis: Axis | None = 0, skipna: bool_t = True, ddof: int = 1, @@ -11979,7 +11974,6 @@ def _stat_function( def min( self, - *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -11996,7 +11990,6 @@ def min( def max( self, - *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12013,7 +12006,6 @@ def max( def mean( self, - *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12025,7 +12017,6 @@ def mean( def median( self, - *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12037,7 +12028,6 @@ def median( def skew( self, - *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12049,7 +12039,6 @@ def skew( def kurt( self, - *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12102,7 +12091,6 @@ def _min_count_stat_function( def sum( self, - *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12115,7 +12103,6 @@ def sum( def prod( self, - *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, From aa396359b3433d6611b735e7b58cac5d0cdd04ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 9 Feb 2024 22:06:48 -0500 Subject: [PATCH 4/5] revert revert --- pandas/core/generic.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3c71784ad81c4..f458e058c9755 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11805,6 +11805,7 @@ def _logical_func( def any( self, + *, axis: Axis | None = 0, bool_only: bool_t = False, skipna: bool_t = True, @@ -11816,6 +11817,7 @@ def any( def all( self, + *, axis: Axis = 0, bool_only: bool_t = False, skipna: bool_t = True, @@ -11919,6 +11921,7 @@ def _stat_function_ddof( def sem( self, + *, axis: Axis | None = 0, skipna: bool_t = True, ddof: int = 1, @@ -11931,6 +11934,7 @@ def sem( def var( self, + *, axis: Axis | None = 0, skipna: bool_t = True, ddof: int = 1, @@ -11943,6 +11947,7 @@ def var( def std( self, + *, axis: Axis | None = 0, skipna: bool_t = True, ddof: int = 1, @@ -11974,6 +11979,7 @@ def _stat_function( def min( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -11990,6 +11996,7 @@ def min( def max( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12006,6 +12013,7 @@ def max( def mean( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12017,6 +12025,7 @@ def mean( def median( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12028,6 +12037,7 @@ def median( def skew( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12039,6 +12049,7 @@ def skew( def kurt( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12091,6 +12102,7 @@ def _min_count_stat_function( def sum( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, @@ -12103,6 +12115,7 @@ def sum( def prod( self, + *, axis: Axis | None = 0, skipna: bool_t = True, numeric_only: bool_t = False, From 7a68720d17b8d379fc6cf668ad4821b23e49c60a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Mon, 12 Feb 2024 17:34:09 -0500 Subject: [PATCH 5/5] remove decorators in Series --- pandas/core/series.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 82bd66245eef4..8093f9aa70cba 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6529,7 +6529,6 @@ def sem( **kwargs, ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var") @doc(make_doc("var", ndim=1)) def var( self, @@ -6548,7 +6547,6 @@ def var( **kwargs, ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std") @doc(make_doc("std", ndim=1)) def std( self, @@ -6567,7 +6565,6 @@ def std( **kwargs, ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew") @doc(make_doc("skew", ndim=1)) def skew( self, @@ -6580,7 +6577,6 @@ def skew( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt") @doc(make_doc("kurt", ndim=1)) def kurt( self,