From 7e9cbd81d2972109a73f564af81315954b68c379 Mon Sep 17 00:00:00 2001 From: evgenii Date: Thu, 3 Jul 2025 10:43:09 +0300 Subject: [PATCH 01/10] STYLE: apply ruff / isort auto-formatting on core.py --- pandas/plotting/_matplotlib/core.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1c7e1ab57b2a9..5d2fa8847a210 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -99,6 +99,10 @@ Series, ) +import itertools + +from matplotlib.collections import LineCollection + def holds_integer(column: Index) -> bool: return column.inferred_type in {"integer", "mixed-integer"} @@ -1549,6 +1553,30 @@ def __init__(self, data, **kwargs) -> None: self.data = self.data.fillna(value=0) def _make_plot(self, fig: Figure) -> None: + threshold = 200 # switch when DataFrame has more than this many columns + can_use_lc = ( + not self._is_ts_plot() # not a TS plot + and not self.stacked # stacking not requested + and not com.any_not_none(*self.errors.values()) # no error bars + and len(self.data.columns) > threshold + ) + if can_use_lc: + ax = self._get_ax(0) + x = self._get_xticks() + segments = [ + np.column_stack((x, self.data[col].values)) for col in self.data.columns + ] + base_colors = mpl.rcParams["axes.prop_cycle"].by_key()["color"] + colors = list(itertools.islice(itertools.cycle(base_colors), len(segments))) + lc = LineCollection( + segments, + colors=colors, + linewidths=self.kwds.get("linewidth", mpl.rcParams["lines.linewidth"]), + ) + ax.add_collection(lc) + ax.margins(0.05) + return # skip the per-column Line2D loop + if self._is_ts_plot(): data = maybe_convert_index(self._get_ax(0), self.data) From 6910da7376abf4fbcbf7b079b864ba12269b0af5 Mon Sep 17 00:00:00 2001 From: evgenii Date: Thu, 3 Jul 2025 11:48:51 +0300 Subject: [PATCH 02/10] TST: use default_rng to satisfy Ruff NPY002 --- .../frame/test_linecollection_speedup.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 pandas/tests/plotting/frame/test_linecollection_speedup.py diff --git a/pandas/tests/plotting/frame/test_linecollection_speedup.py b/pandas/tests/plotting/frame/test_linecollection_speedup.py new file mode 100644 index 0000000000000..6af3a74504969 --- /dev/null +++ b/pandas/tests/plotting/frame/test_linecollection_speedup.py @@ -0,0 +1,23 @@ +""" +Ensure wide DataFrame.line plots use a single LineCollection +instead of one Line2D per column (PR #61764). +""" + +from matplotlib.collections import LineCollection +import matplotlib.pyplot as plt +import numpy as np + +import pandas as pd + + +def test_linecollection_used_for_wide_dataframe(): + rng = np.random.default_rng(0) + df = pd.DataFrame(rng.standard_normal((10, 201)).cumsum(axis=0)) + + ax = df.plot(legend=False) + + # one LineCollection, zero Line2D objects + assert sum(isinstance(c, LineCollection) for c in ax.collections) == 1 + assert len(ax.lines) == 0 + + plt.close(ax.figure) From 8b7b0df9f35e134fb03b6dd2d9cfe4ec8ec66042 Mon Sep 17 00:00:00 2001 From: evgenii Date: Thu, 3 Jul 2025 11:57:21 +0300 Subject: [PATCH 03/10] DOC: add Performance improvement bullet for LineCollection speed-up --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4154942f92907..ef383404f82c7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -670,6 +670,7 @@ Performance improvements - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`) +- Performance improvement in ``DataFrame.plot(kind="line")``: very wide DataFrames (more than 200 columns) are now rendered with a single :class:`matplotlib.collections.LineCollection` instead of one ``Line2D`` per column, reducing draw time by roughly 7 × on a 2000-column frame. (:issue:`61532`, :pr:`61764`) .. --------------------------------------------------------------------------- .. _whatsnew_300.bug_fixes: From d9ac7a60df76c6c2d4f7e54d3c407db55b2246b3 Mon Sep 17 00:00:00 2001 From: evgenii Date: Thu, 3 Jul 2025 12:04:05 +0300 Subject: [PATCH 04/10] TST: skip speedup test when matplotlib is not installed --- .../plotting/frame/test_linecollection_speedup.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/tests/plotting/frame/test_linecollection_speedup.py b/pandas/tests/plotting/frame/test_linecollection_speedup.py index 6af3a74504969..49b4f4502f4e6 100644 --- a/pandas/tests/plotting/frame/test_linecollection_speedup.py +++ b/pandas/tests/plotting/frame/test_linecollection_speedup.py @@ -3,12 +3,16 @@ instead of one Line2D per column (PR #61764). """ -from matplotlib.collections import LineCollection -import matplotlib.pyplot as plt import numpy as np +import pytest import pandas as pd +# Skip this entire module if matplotlib is not installed +mpl = pytest.importorskip("matplotlib") +plt = pytest.importorskip("matplotlib.pyplot") +from matplotlib.collections import LineCollection + def test_linecollection_used_for_wide_dataframe(): rng = np.random.default_rng(0) @@ -16,7 +20,7 @@ def test_linecollection_used_for_wide_dataframe(): ax = df.plot(legend=False) - # one LineCollection, zero Line2D objects + # exactly one LineCollection, and no Line2D artists assert sum(isinstance(c, LineCollection) for c in ax.collections) == 1 assert len(ax.lines) == 0 From a490e241c3cb0e0e2e24a3ccda1ee9e6376d5f5f Mon Sep 17 00:00:00 2001 From: evgenii Date: Thu, 3 Jul 2025 12:54:37 +0300 Subject: [PATCH 05/10] DOC: whatsnew entry for LineCollection speed-up --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ef383404f82c7..238b76dd2039a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -667,10 +667,10 @@ Performance improvements - Performance improvement in :meth:`DataFrame.stack` when using ``future_stack=True`` and the DataFrame does not have a :class:`MultiIndex` (:issue:`58391`) - Performance improvement in :meth:`DataFrame.where` when ``cond`` is a :class:`DataFrame` with many columns (:issue:`61010`) - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`) +- Performance improvement in ``DataFrame.plot(kind="line")``: very wide DataFrames (more than 200 columns) are now rendered with a single :class:`matplotlib.collections.LineCollection` instead of one ``Line2D`` per column, reducing draw time by roughly 7 × on a 2000-column frame. (:issue:`61532`) - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`) -- Performance improvement in ``DataFrame.plot(kind="line")``: very wide DataFrames (more than 200 columns) are now rendered with a single :class:`matplotlib.collections.LineCollection` instead of one ``Line2D`` per column, reducing draw time by roughly 7 × on a 2000-column frame. (:issue:`61532`, :pr:`61764`) .. --------------------------------------------------------------------------- .. _whatsnew_300.bug_fixes: From 308f6a68e3bef8ae5b54c73857bddebfcf3eac57 Mon Sep 17 00:00:00 2001 From: evgenii Date: Sun, 6 Jul 2025 09:56:02 +0300 Subject: [PATCH 06/10] REF: unify _make_plot; single path with LineCollection option --- pandas/plotting/_matplotlib/core.py | 137 +++++++++++++++------------- 1 file changed, 75 insertions(+), 62 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 5d2fa8847a210..f18f865d6effb 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1553,90 +1553,103 @@ def __init__(self, data, **kwargs) -> None: self.data = self.data.fillna(value=0) def _make_plot(self, fig: Figure) -> None: - threshold = 200 # switch when DataFrame has more than this many columns - can_use_lc = ( - not self._is_ts_plot() # not a TS plot - and not self.stacked # stacking not requested - and not com.any_not_none(*self.errors.values()) # no error bars + """ + Draw a DataFrame line plot. For very wide frames (> 200 columns) that are + *not* time-series and have no stacking or error bars, all columns are + rendered with a single LineCollection for a large speed-up while keeping + public behaviour identical to the original per-column path. + """ + # choose once whether to use the LineCollection fast path + threshold = 200 + use_collection = ( + not self._is_ts_plot() + and not self.stacked + and not com.any_not_none(*self.errors.values()) and len(self.data.columns) > threshold ) - if can_use_lc: - ax = self._get_ax(0) - x = self._get_xticks() - segments = [ - np.column_stack((x, self.data[col].values)) for col in self.data.columns - ] - base_colors = mpl.rcParams["axes.prop_cycle"].by_key()["color"] - colors = list(itertools.islice(itertools.cycle(base_colors), len(segments))) - lc = LineCollection( - segments, - colors=colors, - linewidths=self.kwds.get("linewidth", mpl.rcParams["lines.linewidth"]), - ) - ax.add_collection(lc) - ax.margins(0.05) - return # skip the per-column Line2D loop if self._is_ts_plot(): data = maybe_convert_index(self._get_ax(0), self.data) - - x = data.index # dummy, not used + x = data.index # dummy (ignored by _ts_plot) plotf = self._ts_plot it = data.items() else: x = self._get_xticks() - # error: Incompatible types in assignment (expression has type - # "Callable[[Any, Any, Any, Any, Any, Any, KwArg(Any)], Any]", variable has - # type "Callable[[Any, Any, Any, Any, KwArg(Any)], Any]") plotf = self._plot # type: ignore[assignment] - # error: Incompatible types in assignment (expression has type - # "Iterator[tuple[Hashable, ndarray[Any, Any]]]", variable has - # type "Iterable[tuple[Hashable, Series]]") it = self._iter_data(data=self.data) # type: ignore[assignment] + # shared state stacking_id = self._get_stacking_id() is_errorbar = com.any_not_none(*self.errors.values()) - colors = self._get_colors() + segments: list[np.ndarray] = [] # vertices for LineCollection + + # unified per-column loop for i, (label, y) in enumerate(it): - ax = self._get_ax(i) + ax = self._get_ax(i if not use_collection else 0) + kwds = self.kwds.copy() if self.color is not None: kwds["color"] = self.color - style, kwds = self._apply_style_colors( - colors, - kwds, - i, - # error: Argument 4 to "_apply_style_colors" of "MPLPlot" has - # incompatible type "Hashable"; expected "str" - label, # type: ignore[arg-type] - ) - errors = self._get_errorbars(label=label, index=i) - kwds = dict(kwds, **errors) + style, kwds = self._apply_style_colors(colors, kwds, i, label) + kwds.update(self._get_errorbars(label=label, index=i)) + + label_str = self._mark_right_label(pprint_thing(label), index=i) + kwds["label"] = label_str + + if use_collection: + # collect vertices for the final LineCollection + segments.append(np.column_stack((x, y))) + + # keep legend parity with a tiny proxy only if legend is on + if self.legend: + proxy = mpl.lines.Line2D( + [], + [], + color=kwds.get("color"), + linewidth=kwds.get( + "linewidth", mpl.rcParams["lines.linewidth"] + ), + linestyle=kwds.get("linestyle", "-"), + marker=kwds.get("marker", None), + ) + self._append_legend_handles_labels(proxy, label_str) + else: + newlines = plotf( + ax, + x, + y, + style=style, + column_num=i, + stacking_id=stacking_id, + is_errorbar=is_errorbar, + **kwds, + ) + self._append_legend_handles_labels(newlines[0], label_str) - label = pprint_thing(label) - label = self._mark_right_label(label, index=i) - kwds["label"] = label - - newlines = plotf( - ax, - x, - y, - style=style, - column_num=i, - stacking_id=stacking_id, - is_errorbar=is_errorbar, - **kwds, + # reset x-limits for true time-series plots + if self._is_ts_plot(): + lines = get_all_lines(ax) + left, right = get_xlim(lines) + ax.set_xlim(left, right) + + if use_collection and segments: + if self.legend: + lc_colors = [h.get_color() for h in self.legend_handles] + else: + # no legend - just follow the default colour cycle + base = mpl.rcParams["axes.prop_cycle"].by_key()["color"] + lc_colors = list(itertools.islice(itertools.cycle(base), len(segments))) + + lc = LineCollection( + segments, + colors=lc_colors, + linewidths=self.kwds.get("linewidth", mpl.rcParams["lines.linewidth"]), ) - self._append_legend_handles_labels(newlines[0], label) - - if self._is_ts_plot(): - # reset of xlim should be used for ts data - # TODO: GH28021, should find a way to change view limit on xaxis - lines = get_all_lines(ax) - left, right = get_xlim(lines) - ax.set_xlim(left, right) + ax0 = self._get_ax(0) + ax0.add_collection(lc) + ax0.margins(0.05) # error: Signature of "_plot" incompatible with supertype "MPLPlot" @classmethod From 08c0fa912d6c592b23d9e286ad4df798f33d5eff Mon Sep 17 00:00:00 2001 From: evgenii Date: Sun, 6 Jul 2025 10:05:55 +0300 Subject: [PATCH 07/10] DOC: whatsnew entry for LineCollection speed-up --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 238b76dd2039a..44c35fd103870 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -667,7 +667,7 @@ Performance improvements - Performance improvement in :meth:`DataFrame.stack` when using ``future_stack=True`` and the DataFrame does not have a :class:`MultiIndex` (:issue:`58391`) - Performance improvement in :meth:`DataFrame.where` when ``cond`` is a :class:`DataFrame` with many columns (:issue:`61010`) - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`) -- Performance improvement in ``DataFrame.plot(kind="line")``: very wide DataFrames (more than 200 columns) are now rendered with a single :class:`matplotlib.collections.LineCollection` instead of one ``Line2D`` per column, reducing draw time by roughly 7 × on a 2000-column frame. (:issue:`61532`) +- Performance improvement in ``DataFrame.plot(kind="line")``: very wide DataFrames (more than 200 columns) are now rendered with a single :class:`matplotlib.collections.LineCollection` instead of one ``Line2D`` per column, reducing draw time by roughly 5x on a 2000-column frame. (:issue:`61532`) - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`) - Performance improvement in indexing operations for string dtypes (:issue:`56997`) - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`) From 1a6f47bf6762c933572c2ab9394aa7de6b913ec4 Mon Sep 17 00:00:00 2001 From: evgenii Date: Sun, 6 Jul 2025 12:33:29 +0300 Subject: [PATCH 08/10] TYP: silence mypy warnings in unified _make_plot --- pandas/plotting/_matplotlib/core.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index f18f865d6effb..754b46394a67c 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1636,7 +1636,10 @@ def _make_plot(self, fig: Figure) -> None: if use_collection and segments: if self.legend: - lc_colors = [h.get_color() for h in self.legend_handles] + lc_colors = [ + cast(mpl.lines.Line2D, h).get_color() # type: ignore[attr-defined] + for h in self.legend_handles + ] else: # no legend - just follow the default colour cycle base = mpl.rcParams["axes.prop_cycle"].by_key()["color"] From 4e266440e11cf18c6237cffc127f9ab84f54958d Mon Sep 17 00:00:00 2001 From: evgenii Date: Sun, 6 Jul 2025 12:54:28 +0300 Subject: [PATCH 09/10] TYP: align ignore hints after line shifts; drop unused ignore --- pandas/plotting/_matplotlib/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 754b46394a67c..8f05bb254e912 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1592,7 +1592,9 @@ def _make_plot(self, fig: Figure) -> None: if self.color is not None: kwds["color"] = self.color - style, kwds = self._apply_style_colors(colors, kwds, i, label) + style, kwds = self._apply_style_colors( # type: ignore[arg-type] + colors, kwds, i, label + ) kwds.update(self._get_errorbars(label=label, index=i)) label_str = self._mark_right_label(pprint_thing(label), index=i) @@ -1637,8 +1639,7 @@ def _make_plot(self, fig: Figure) -> None: if use_collection and segments: if self.legend: lc_colors = [ - cast(mpl.lines.Line2D, h).get_color() # type: ignore[attr-defined] - for h in self.legend_handles + cast(mpl.lines.Line2D, h).get_color() for h in self.legend_handles ] else: # no legend - just follow the default colour cycle From 3badad12178751522d238e45d456bcb11141eb14 Mon Sep 17 00:00:00 2001 From: evgenii Date: Sun, 6 Jul 2025 13:51:54 +0300 Subject: [PATCH 10/10] MAINT: replace ambiguous space in plotting docstring --- pandas/plotting/_matplotlib/core.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 8f05bb254e912..31090aa5bc80e 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1559,7 +1559,7 @@ def _make_plot(self, fig: Figure) -> None: rendered with a single LineCollection for a large speed-up while keeping public behaviour identical to the original per-column path. """ - # choose once whether to use the LineCollection fast path + # decide once whether we can use the LineCollection fast draw threshold = 200 use_collection = ( not self._is_ts_plot() @@ -1568,9 +1568,10 @@ def _make_plot(self, fig: Figure) -> None: and len(self.data.columns) > threshold ) + # choose ts-plot helper vs. regular helper if self._is_ts_plot(): data = maybe_convert_index(self._get_ax(0), self.data) - x = data.index # dummy (ignored by _ts_plot) + x = data.index # dummy; _ts_plot ignores it plotf = self._ts_plot it = data.items() else: @@ -1592,8 +1593,11 @@ def _make_plot(self, fig: Figure) -> None: if self.color is not None: kwds["color"] = self.color - style, kwds = self._apply_style_colors( # type: ignore[arg-type] - colors, kwds, i, label + style, kwds = self._apply_style_colors( + colors, + kwds, + i, + label, # type: ignore[arg-type] ) kwds.update(self._get_errorbars(label=label, index=i)) @@ -1601,10 +1605,10 @@ def _make_plot(self, fig: Figure) -> None: kwds["label"] = label_str if use_collection: - # collect vertices for the final LineCollection + # collect vertices; defer drawing segments.append(np.column_stack((x, y))) - # keep legend parity with a tiny proxy only if legend is on + # tiny proxy only if legend is requested if self.legend: proxy = mpl.lines.Line2D( [], @@ -1614,7 +1618,7 @@ def _make_plot(self, fig: Figure) -> None: "linewidth", mpl.rcParams["lines.linewidth"] ), linestyle=kwds.get("linestyle", "-"), - marker=kwds.get("marker", None), + marker=kwds.get("marker"), ) self._append_legend_handles_labels(proxy, label_str) else: @@ -1630,19 +1634,21 @@ def _make_plot(self, fig: Figure) -> None: ) self._append_legend_handles_labels(newlines[0], label_str) - # reset x-limits for true time-series plots + # reset x-limits for true ts plots if self._is_ts_plot(): lines = get_all_lines(ax) left, right = get_xlim(lines) ax.set_xlim(left, right) + # single draw call for fast path if use_collection and segments: if self.legend: lc_colors = [ - cast(mpl.lines.Line2D, h).get_color() for h in self.legend_handles + cast(mpl.lines.Line2D, h).get_color() # mypy: h is Line2D + for h in self.legend_handles ] else: - # no legend - just follow the default colour cycle + # no legend - repeat default colour cycle base = mpl.rcParams["axes.prop_cycle"].by_key()["color"] lc_colors = list(itertools.islice(itertools.cycle(base), len(segments)))