From bcea8ffee538e2ee35e9b8300b01b75202961ff7 Mon Sep 17 00:00:00 2001 From: tommyod Date: Sat, 13 Jan 2018 19:32:15 +0100 Subject: [PATCH 1/8] Exposed arguments in plot.kde, added number of sample points as option --- pandas/plotting/_core.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 88b899ad60313..715369fe1d00a 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1398,6 +1398,10 @@ def _get_ind(self, y): sample_range = np.nanmax(y) - np.nanmin(y) ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, np.nanmax(y) + 0.5 * sample_range, 1000) + elif isinstance(self.ind, (int, np.int)): + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, self.ind) else: ind = self.ind return ind @@ -2598,12 +2602,22 @@ def hist(self, bins=10, **kwds): """ return self(kind='hist', bins=bins, **kwds) - def kde(self, **kwds): + def kde(self, bw_method = None, ind = None, **kwds): """ Kernel Density Estimate plot Parameters ---------- + bw_method: str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :scipy:class:`stats.gaussian_kde` for more information. + ind : NumPy array or integer, optional + Evaluation points. If None (default), 1000 equally spaced points + are used. If `ind` is a NumPy array, the kde is evaluated at the + points passed. If `ind` is an integer, `ind` number of equally + spaced points are used. `**kwds` : optional Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. @@ -2611,7 +2625,7 @@ def kde(self, **kwds): ------- axes : matplotlib.AxesSubplot or np.array of them """ - return self(kind='kde', **kwds) + return self(kind='kde', bw_method = bw_method, ind = ind, **kwds) density = kde @@ -2766,20 +2780,30 @@ def hist(self, by=None, bins=10, **kwds): """ return self(kind='hist', by=by, bins=bins, **kwds) - def kde(self, **kwds): + def kde(self, bw_method = None, ind = None, **kwds): """ Kernel Density Estimate plot Parameters ---------- + bw_method: str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :scipy:class:`stats.gaussian_kde` for more information. + ind : NumPy array or integer, optional + Evaluation points. If None (default), 1000 equally spaced points + are used. If `ind` is a NumPy array, the kde is evaluated at the + points passed. If `ind` is an integer, `ind` number of equally + spaced points are used. `**kwds` : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. Returns ------- axes : matplotlib.AxesSubplot or np.array of them """ - return self(kind='kde', **kwds) + return self(kind='kde', bw_method = bw_method, ind = ind, **kwds) density = kde @@ -2866,3 +2890,4 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, if gridsize is not None: kwds['gridsize'] = gridsize return self(kind='hexbin', x=x, y=y, C=C, **kwds) + From 1961718d73fe9809bed49502d02474595f0d87eb Mon Sep 17 00:00:00 2001 From: tommyod Date: Sat, 13 Jan 2018 19:33:42 +0100 Subject: [PATCH 2/8] Added a test for plot.kde with as an integer --- pandas/tests/plotting/test_series.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 2458fc0dc992c..a3277a7d3b07c 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -622,6 +622,7 @@ def test_kde_kwargs(self): pytest.skip("mpl is not supported") from numpy import linspace + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=20) _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=linspace(-100, 100, 20)) _check_plot_works(self.ts.plot.density, bw_method=.5, @@ -897,3 +898,4 @@ def test_custom_business_day_freq(self): freq=CustomBusinessDay(holidays=['2014-05-26']))) _check_plot_works(s.plot) + From 97e7b839b3f2802876512f689a05d4a21c7b7300 Mon Sep 17 00:00:00 2001 From: tommyod Date: Sun, 14 Jan 2018 07:54:52 +0100 Subject: [PATCH 3/8] Added whatsnew. Fixed flake8 errors. Used is_integer to infer type. --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/plotting/_core.py | 15 +++++++-------- pandas/tests/plotting/test_series.py | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 66e88e181ac0f..cb49b386557af 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -536,7 +536,7 @@ Plotting - :func: `DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) - Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). -- +- The arguments ``ind`` and ``bw_method`` are added to the docstring of :meth:`Series.plot.kde` (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). - Groupby/Resample/Rolling diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 715369fe1d00a..c5ae1d6073b86 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1398,7 +1398,7 @@ def _get_ind(self, y): sample_range = np.nanmax(y) - np.nanmin(y) ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, np.nanmax(y) + 0.5 * sample_range, 1000) - elif isinstance(self.ind, (int, np.int)): + elif is_integer(self.ind): sample_range = np.nanmax(y) - np.nanmin(y) ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, np.nanmax(y) + 0.5 * sample_range, self.ind) @@ -2602,7 +2602,7 @@ def hist(self, bins=10, **kwds): """ return self(kind='hist', bins=bins, **kwds) - def kde(self, bw_method = None, ind = None, **kwds): + def kde(self, bw_method=None, ind=None, **kwds): """ Kernel Density Estimate plot @@ -2616,7 +2616,7 @@ def kde(self, bw_method = None, ind = None, **kwds): ind : NumPy array or integer, optional Evaluation points. If None (default), 1000 equally spaced points are used. If `ind` is a NumPy array, the kde is evaluated at the - points passed. If `ind` is an integer, `ind` number of equally + points passed. If `ind` is an integer, `ind` number of equally spaced points are used. `**kwds` : optional Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. @@ -2625,7 +2625,7 @@ def kde(self, bw_method = None, ind = None, **kwds): ------- axes : matplotlib.AxesSubplot or np.array of them """ - return self(kind='kde', bw_method = bw_method, ind = ind, **kwds) + return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) density = kde @@ -2780,7 +2780,7 @@ def hist(self, by=None, bins=10, **kwds): """ return self(kind='hist', by=by, bins=bins, **kwds) - def kde(self, bw_method = None, ind = None, **kwds): + def kde(self, bw_method=None, ind=None, **kwds): """ Kernel Density Estimate plot @@ -2794,7 +2794,7 @@ def kde(self, bw_method = None, ind = None, **kwds): ind : NumPy array or integer, optional Evaluation points. If None (default), 1000 equally spaced points are used. If `ind` is a NumPy array, the kde is evaluated at the - points passed. If `ind` is an integer, `ind` number of equally + points passed. If `ind` is an integer, `ind` number of equally spaced points are used. `**kwds` : optional Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. @@ -2803,7 +2803,7 @@ def kde(self, bw_method = None, ind = None, **kwds): ------- axes : matplotlib.AxesSubplot or np.array of them """ - return self(kind='kde', bw_method = bw_method, ind = ind, **kwds) + return self(kind='kde', bw_method=bw_method, ind=ind, **kwds) density = kde @@ -2890,4 +2890,3 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, if gridsize is not None: kwds['gridsize'] = gridsize return self(kind='hexbin', x=x, y=y, C=C, **kwds) - diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index a3277a7d3b07c..762f27345ecdd 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -623,6 +623,7 @@ def test_kde_kwargs(self): from numpy import linspace _check_plot_works(self.ts.plot.kde, bw_method=None, ind=20) + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=np.int(20)) _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=linspace(-100, 100, 20)) _check_plot_works(self.ts.plot.density, bw_method=.5, @@ -898,4 +899,3 @@ def test_custom_business_day_freq(self): freq=CustomBusinessDay(holidays=['2014-05-26']))) _check_plot_works(s.plot) - From 1d14492f4f68c7011943cbb53fa74cbc0ca66f81 Mon Sep 17 00:00:00 2001 From: tommyod Date: Sat, 20 Jan 2018 14:23:15 +0100 Subject: [PATCH 4/8] Updated scipy reference --- pandas/plotting/_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index c5ae1d6073b86..ef5f42f0bb54f 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2612,7 +2612,7 @@ def kde(self, bw_method=None, ind=None, **kwds): The method used to calculate the estimator bandwidth. This can be 'scott', 'silverman', a scalar constant or a callable. If None (default), 'scott' is used. - See :scipy:class:`stats.gaussian_kde` for more information. + See :class:`scipy.stats.gaussian_kde` for more information. ind : NumPy array or integer, optional Evaluation points. If None (default), 1000 equally spaced points are used. If `ind` is a NumPy array, the kde is evaluated at the @@ -2790,7 +2790,7 @@ def kde(self, bw_method=None, ind=None, **kwds): The method used to calculate the estimator bandwidth. This can be 'scott', 'silverman', a scalar constant or a callable. If None (default), 'scott' is used. - See :scipy:class:`stats.gaussian_kde` for more information. + See :class:`scipy.stats.gaussian_kde` for more information. ind : NumPy array or integer, optional Evaluation points. If None (default), 1000 equally spaced points are used. If `ind` is a NumPy array, the kde is evaluated at the From 0e9cb3790de7cca2b42d33d6ee90f7d07f74d8e3 Mon Sep 17 00:00:00 2001 From: tommyod Date: Sun, 21 Jan 2018 20:45:57 +0100 Subject: [PATCH 5/8] Added test, rewrote whatsnew, removed import --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/tests/plotting/test_series.py | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index cb49b386557af..5feebe3ba2a22 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -536,7 +536,7 @@ Plotting - :func: `DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) - Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). -- The arguments ``ind`` and ``bw_method`` are added to the docstring of :meth:`Series.plot.kde` (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). +- :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). - Groupby/Resample/Rolling diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 762f27345ecdd..59de66beffb90 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -621,16 +621,14 @@ def test_kde_kwargs(self): if not self.mpl_ge_1_5_0: pytest.skip("mpl is not supported") - from numpy import linspace + sample_points = np.linspace(-100, 100, 20) + _check_plot_works(self.ts.plot.kde, bw_method='scott', ind=20) _check_plot_works(self.ts.plot.kde, bw_method=None, ind=20) _check_plot_works(self.ts.plot.kde, bw_method=None, ind=np.int(20)) - _check_plot_works(self.ts.plot.kde, bw_method=.5, - ind=linspace(-100, 100, 20)) - _check_plot_works(self.ts.plot.density, bw_method=.5, - ind=linspace(-100, 100, 20)) + _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=sample_points) + _check_plot_works(self.ts.plot.density, bw_method=.5, ind=sample_points) _, ax = self.plt.subplots() - ax = self.ts.plot.kde(logy=True, bw_method=.5, - ind=linspace(-100, 100, 20), ax=ax) + ax = self.ts.plot.kde(logy=True, bw_method=.5, ind=sample_points, ax=ax) self._check_ax_scales(ax, yaxis='log') self._check_text_labels(ax.yaxis.get_label(), 'Density') From b2d3ab15c9538722a8efcdc6c122e85ae0899ba0 Mon Sep 17 00:00:00 2001 From: tommyod Date: Sun, 21 Jan 2018 21:17:05 +0100 Subject: [PATCH 6/8] Changed from Series to DataFrame in doc --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index ef5f42f0bb54f..b15c5271ae321 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2797,7 +2797,7 @@ def kde(self, bw_method=None, ind=None, **kwds): points passed. If `ind` is an integer, `ind` number of equally spaced points are used. `**kwds` : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. Returns ------- From a56b19b0c1e3e05fee8199e23b28683aef0db3e1 Mon Sep 17 00:00:00 2001 From: tommyod Date: Thu, 1 Feb 2018 06:26:01 +0100 Subject: [PATCH 7/8] Fixed PEP8 errors in test file --- pandas/tests/plotting/test_series.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 59de66beffb90..07b985c6643ef 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -626,9 +626,11 @@ def test_kde_kwargs(self): _check_plot_works(self.ts.plot.kde, bw_method=None, ind=20) _check_plot_works(self.ts.plot.kde, bw_method=None, ind=np.int(20)) _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=sample_points) - _check_plot_works(self.ts.plot.density, bw_method=.5, ind=sample_points) + _check_plot_works(self.ts.plot.density, bw_method=.5, + flakind=sample_points) _, ax = self.plt.subplots() - ax = self.ts.plot.kde(logy=True, bw_method=.5, ind=sample_points, ax=ax) + ax = self.ts.plot.kde(logy=True, bw_method=.5, ind=sample_points, + ax=ax) self._check_ax_scales(ax, yaxis='log') self._check_text_labels(ax.yaxis.get_label(), 'Density') From 826a124ad200468e6eb7deffd3dae1deda0da73e Mon Sep 17 00:00:00 2001 From: tommyod Date: Thu, 1 Feb 2018 15:11:51 +0100 Subject: [PATCH 8/8] Fixed typo which made tests crash --- pandas/tests/plotting/test_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 07b985c6643ef..278be433183fa 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -627,7 +627,7 @@ def test_kde_kwargs(self): _check_plot_works(self.ts.plot.kde, bw_method=None, ind=np.int(20)) _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=sample_points) _check_plot_works(self.ts.plot.density, bw_method=.5, - flakind=sample_points) + ind=sample_points) _, ax = self.plt.subplots() ax = self.ts.plot.kde(logy=True, bw_method=.5, ind=sample_points, ax=ax)