diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a9841c4475822..e93dedbcf354a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -940,6 +940,7 @@ Other - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`) - Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) - Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`) +- Bug in ``Series.replace`` when the Series was created from an :class:`Index` and Copy-On-Write is enabled (:issue:`61622`) - Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`) - Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`) - Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6aa5062b8ed86..6aaf0b4abea1c 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -10,7 +10,6 @@ final, ) import warnings -import weakref import numpy as np @@ -863,14 +862,22 @@ def replace_list( ) if i != src_len: - # This is ugly, but we have to get rid of intermediate refs - # that did not go out of scope yet, otherwise we will trigger - # many unnecessary copies + # This is ugly, but we have to get rid of intermediate refs. We + # can simply clear the referenced_blocks if we already copied, + # otherwise we have to remove ourselves + self_blk_ids = { + id(b()): i for i, b in enumerate(self.refs.referenced_blocks) + } for b in result: - ref = weakref.ref(b) - b.refs.referenced_blocks.pop( - b.refs.referenced_blocks.index(ref) - ) + if b.refs is self.refs: + # We are still sharing memory with self + if id(b) in self_blk_ids: + # Remove ourselves from the refs; we are temporary + self.refs.referenced_blocks.pop(self_blk_ids[id(b)]) + else: + # We have already copied, so we can clear the refs to avoid + # future copies + b.refs.referenced_blocks.clear() new_rb.extend(result) rb = new_rb return rb diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index abd5d075ea3d5..7d068c2120735 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd import pandas._testing as tm from pandas.core.arrays import IntervalArray @@ -715,3 +717,12 @@ def test_replace_all_NA(self): result = df.replace({r"^#": "$"}, regex=True) expected = pd.Series([pd.NA, pd.NA]) tm.assert_series_equal(result, expected) + + +@td.skip_if_no("pyarrow") +def test_replace_from_index(): + # https://github.com/pandas-dev/pandas/issues/61622 + idx = pd.Index(["a", "b", "c"], dtype="string[pyarrow]") + expected = pd.Series(["d", "b", "c"], dtype="string[pyarrow]") + result = pd.Series(idx).replace({"z": "b", "a": "d"}) + tm.assert_series_equal(result, expected)