diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst index f5aa0968ae362..1ef60960a51c3 100644 --- a/doc/source/whatsnew/v2.1.1.rst +++ b/doc/source/whatsnew/v2.1.1.rst @@ -13,6 +13,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :func:`read_csv` when ``usecols`` is given and ``dtypes`` is a dict for ``engine="python"`` (:issue:`54868`) - Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 520d2193e1c04..6846ea2b196b8 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -1176,17 +1176,17 @@ def _set_no_thousand_columns(self) -> set[int]: ) if self.columns and self.dtype: assert self._col_indices is not None - for i in self._col_indices: + for i, col in zip(self._col_indices, self.columns): if not isinstance(self.dtype, dict) and not is_numeric_dtype( self.dtype ): no_thousands_columns.add(i) if ( isinstance(self.dtype, dict) - and self.columns[i] in self.dtype + and col in self.dtype and ( - not is_numeric_dtype(self.dtype[self.columns[i]]) - or is_bool_dtype(self.dtype[self.columns[i]]) + not is_numeric_dtype(self.dtype[col]) + or is_bool_dtype(self.dtype[col]) ) ): no_thousands_columns.add(i) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index a578b2a402e93..97a32ad79a67c 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -575,3 +575,20 @@ def test_accurate_parsing_of_large_integers(all_parsers): assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263359, "ID_DEAL"]) == 1 assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263360, "ID_DEAL"]) == 2 assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263361, "ID_DEAL"]) == 2 + + +def test_dtypes_with_usecols(all_parsers): + # GH#54868 + + parser = all_parsers + data = """a,b,c +1,2,3 +4,5,6""" + + result = parser.read_csv(StringIO(data), usecols=["a", "c"], dtype={"a": object}) + if parser.engine == "pyarrow": + values = [1, 4] + else: + values = ["1", "4"] + expected = DataFrame({"a": pd.Series(values, dtype=object), "c": [3, 6]}) + tm.assert_frame_equal(result, expected)