diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 57dce003c2846..86144391a6f1a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -666,6 +666,7 @@ Performance improvements - Performance improvement in :meth:`DataFrame.stack` when using ``future_stack=True`` and the DataFrame does not have a :class:`MultiIndex` (:issue:`58391`) - Performance improvement in :meth:`DataFrame.where` when ``cond`` is a :class:`DataFrame` with many columns (:issue:`61010`) - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`) +- Performance improvement in ``DataFrame.plot(kind="line")``: very wide DataFrames (more than 200 columns) are now rendered with a single :class:`matplotlib.collections.LineCollection` instead of one ``Line2D`` per column, reducing draw time by roughly 7 × on a 2000-column frame. (:issue:`61532`) - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1c7e1ab57b2a9..5d2fa8847a210 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -99,6 +99,10 @@ Series, ) +import itertools + +from matplotlib.collections import LineCollection + def holds_integer(column: Index) -> bool: return column.inferred_type in {"integer", "mixed-integer"} @@ -1549,6 +1553,30 @@ def __init__(self, data, **kwargs) -> None: self.data = self.data.fillna(value=0) def _make_plot(self, fig: Figure) -> None: + threshold = 200 # switch when DataFrame has more than this many columns + can_use_lc = ( + not self._is_ts_plot() # not a TS plot + and not self.stacked # stacking not requested + and not com.any_not_none(*self.errors.values()) # no error bars + and len(self.data.columns) > threshold + ) + if can_use_lc: + ax = self._get_ax(0) + x = self._get_xticks() + segments = [ + np.column_stack((x, self.data[col].values)) for col in self.data.columns + ] + base_colors = mpl.rcParams["axes.prop_cycle"].by_key()["color"] + colors = list(itertools.islice(itertools.cycle(base_colors), len(segments))) + lc = LineCollection( + segments, + colors=colors, + linewidths=self.kwds.get("linewidth", mpl.rcParams["lines.linewidth"]), + ) + ax.add_collection(lc) + ax.margins(0.05) + return # skip the per-column Line2D loop + if self._is_ts_plot(): data = maybe_convert_index(self._get_ax(0), self.data) diff --git a/pandas/tests/plotting/frame/test_linecollection_speedup.py b/pandas/tests/plotting/frame/test_linecollection_speedup.py new file mode 100644 index 0000000000000..49b4f4502f4e6 --- /dev/null +++ b/pandas/tests/plotting/frame/test_linecollection_speedup.py @@ -0,0 +1,27 @@ +""" +Ensure wide DataFrame.line plots use a single LineCollection +instead of one Line2D per column (PR #61764). +""" + +import numpy as np +import pytest + +import pandas as pd + +# Skip this entire module if matplotlib is not installed +mpl = pytest.importorskip("matplotlib") +plt = pytest.importorskip("matplotlib.pyplot") +from matplotlib.collections import LineCollection + + +def test_linecollection_used_for_wide_dataframe(): + rng = np.random.default_rng(0) + df = pd.DataFrame(rng.standard_normal((10, 201)).cumsum(axis=0)) + + ax = df.plot(legend=False) + + # exactly one LineCollection, and no Line2D artists + assert sum(isinstance(c, LineCollection) for c in ax.collections) == 1 + assert len(ax.lines) == 0 + + plt.close(ax.figure)