pandas-dev
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/io/stata.py
Lines changed: 31 additions & 6 deletions b/‎pandas/io/stata.py
Lines changed: 31 additions & 6 deletions
diff --git a/‎pandas/tests/io/data/stata/stata1_108.dta
703 Bytes b/‎pandas/tests/io/data/stata/stata1_108.dta
703 Bytes
diff --git a/‎pandas/tests/io/data/stata/stata1_110.dta
945 Bytes b/‎pandas/tests/io/data/stata/stata1_110.dta
945 Bytes
diff --git a/‎pandas/tests/io/data/stata/stata1_111.dta
945 Bytes b/‎pandas/tests/io/data/stata/stata1_111.dta
945 Bytes
diff --git a/‎pandas/tests/io/data/stata/stata1_113.dta
945 Bytes b/‎pandas/tests/io/data/stata/stata1_113.dta
945 Bytes
diff --git a/‎pandas/tests/io/data/stata/stata1_115.dta
1.1 KB b/‎pandas/tests/io/data/stata/stata1_115.dta
1.1 KB
diff --git a/‎pandas/tests/io/data/stata/stata1_118.dta
3.69 KB b/‎pandas/tests/io/data/stata/stata1_118.dta
3.69 KB
diff --git a/‎pandas/tests/io/data/stata/stata1_119.dta
3.7 KB b/‎pandas/tests/io/data/stata/stata1_119.dta
3.7 KB
diff --git a/‎pandas/tests/io/data/stata/stata8_108.dta
703 Bytes b/‎pandas/tests/io/data/stata/stata8_108.dta
703 Bytes
@@ -583,6 +583,7 @@ I/O
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
+- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
 
 Period
 ^^^^^^
 
@@ -983,6 +983,19 @@ def __init__(self) -> None:
                 np.float64(struct.unpack("<d", float64_max)[0]),
             ),
         }
+        self.OLD_VALID_RANGE = {
+            "b": (-128, 126),
+            "h": (-32768, 32766),
+            "l": (-2147483648, 2147483646),
+            "f": (
+                np.float32(struct.unpack("<f", float32_min)[0]),
+                np.float32(struct.unpack("<f", float32_max)[0]),
+            ),
+            "d": (
+                np.float64(struct.unpack("<d", float64_min)[0]),
+                np.float64(struct.unpack("<d", float64_max)[0]),
+            ),
+        }
 
         self.OLD_TYPE_MAPPING = {
             98: 251,  # byte
@@ -994,7 +1007,7 @@ def __init__(self) -> None:
 
         # These missing values are the generic '.' in Stata, and are used
         # to replace nans
-        self.MISSING_VALUES = {
+        self.MISSING_VALUES: dict[str, int | np.float32 | np.float64] = {
             "b": 101,
             "h": 32741,
             "l": 2147483621,
@@ -1808,11 +1821,18 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra
         replacements = {}
         for i in range(len(data.columns)):
             fmt = self._typlist[i]
-            if fmt not in self.VALID_RANGE:
-                continue
+            if self._format_version <= 111:
+                if fmt not in self.OLD_VALID_RANGE:
+                    continue
 
-            fmt = cast(str, fmt)  # only strs in VALID_RANGE
-            nmin, nmax = self.VALID_RANGE[fmt]
+                fmt = cast(str, fmt)  # only strs in OLD_VALID_RANGE
+                nmin, nmax = self.OLD_VALID_RANGE[fmt]
+            else:
+                if fmt not in self.VALID_RANGE:
+                    continue
+
+                fmt = cast(str, fmt)  # only strs in VALID_RANGE
+                nmin, nmax = self.VALID_RANGE[fmt]
             series = data.iloc[:, i]
 
             # appreciably faster to do this with ndarray instead of Series
@@ -1827,7 +1847,12 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra
                 umissing, umissing_loc = np.unique(series[missing], return_inverse=True)
                 replacement = Series(series, dtype=object)
                 for j, um in enumerate(umissing):
-                    missing_value = StataMissingValue(um)
+                    if self._format_version <= 111:
+                        missing_value = StataMissingValue(
+                            float(self.MISSING_VALUES[fmt])
+                        )
+                    else:
+                        missing_value = StataMissingValue(um)
 
                     loc = missing_loc[umissing_loc == j]
                     replacement.iloc[loc] = missing_value