pandas-dev · Liam3851 · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -72,6 +72,8 @@
 from pandas.io.formats import printing
 
 if TYPE_CHECKING:
+    from collections.abc import MutableMapping
+
     import pyarrow
 
     from pandas._typing import (
@@ -218,6 +220,10 @@ def __eq__(self, other: object) -> bool:
             return self.storage == other.storage and self.na_value is other.na_value
         return False
 
+    def __setstate__(self, state: MutableMapping[str, Any]) -> None:
+        self.storage = state.pop("storage", "python")
-        self.storage = state.pop("storage", "python")
+        # back-compat for pandas < 2.3, where na_value did not yet exist
+        self.storage = state.pop("storage", "python")
-        self.storage = state.pop("storage", "python")
+        # back-compat for pandas < 2.3, where na_value did not yet exist
+        self.storage = state.pop("storage", "python")
+        self._na_value = state.pop("_na_value", libmissing.NA)
+
     def __hash__(self) -> int:
         # need to override __hash__ as well because of overriding __eq__
         return super().__hash__()

diff --git a/pandas/tests/io/data/legacy_pickle/1.5.3/1.5.3_x86_64_win_3.11.13.pickle b/pandas/tests/io/data/legacy_pickle/1.5.3/1.5.3_x86_64_win_3.11.13.pickle
diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py
@@ -147,6 +147,7 @@ def create_pickle_data():
         "float": Index(np.arange(10, dtype=np.float64)),
         "uint": Index(np.arange(10, dtype=np.uint64)),
         "timedelta": timedelta_range("00:00:00", freq="30min", periods=10),
+        "string": Index(["foo", "bar", "baz", "qux", "quux"], dtype="string"),
     }
 
     index["range"] = RangeIndex(10)
@@ -185,6 +186,7 @@ def create_pickle_data():
         "dt": Series(date_range("20130101", periods=5)),
         "dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")),
         "period": Series([Period("2000Q1")] * 5),
+        "string": Series(["foo", "bar", "baz", "qux", "quux"], dtype="string"),
     }
 
     mixed_dup_df = DataFrame(data)
@@ -233,6 +235,12 @@ def create_pickle_data():
             },
             index=range(5),
         ),
+        "string": DataFrame(
+            {
+                "A": Series(["foo", "bar", "baz", "qux", "quux"], dtype="string"),
+                "B": Series(["one", "two", "one", "two", "three"], dtype="string"),
+            }
+        ),
     }
 
     cat = {