From 171c02b346665141ff889efd369296b3b496700f Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Wed, 6 Dec 2023 11:49:23 -0600 Subject: [PATCH 1/5] Keep series names when not ignoring them --- pandas/core/reshape/concat.py | 22 ++++++++++++---------- pandas/tests/reshape/concat/test_concat.py | 10 +++++++++- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 1bc548de91f01..d46348fff7a02 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -464,7 +464,7 @@ def __init__( # if we have mixed ndims, then convert to highest ndim # creating column numbers as needed if len(ndims) > 1: - objs, sample = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis) + objs = self._sanitize_mixed_ndim(objs, sample, ignore_index, axis) self.objs = objs @@ -580,7 +580,7 @@ def _sanitize_mixed_ndim( sample: Series | DataFrame, ignore_index: bool, axis: AxisInt, - ) -> tuple[list[Series | DataFrame], Series | DataFrame]: + ) -> list[Series | DataFrame]: # if we have mixed ndims, then convert to highest ndim # creating column numbers as needed @@ -601,19 +601,21 @@ def _sanitize_mixed_ndim( else: name = getattr(obj, "name", None) if ignore_index or name is None: - name = current_column - current_column += 1 - - # doing a row-wise concatenation so need everything - # to line up - if self._is_frame and axis == 1: - name = 0 + if axis == 1: + # doing a row-wise concatenation so need everything + # to line up + name = 0 + else: + # doing a column-wise concatenation so need series + # to have unique names + name = current_column + current_column += 1 obj = sample._constructor({name: obj}, copy=False) new_objs.append(obj) - return new_objs, sample + return new_objs def get_result(self): cons: Callable[..., DataFrame | Series] diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index ea0d510d2b8f8..407e3f56e3536 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -326,11 +326,19 @@ def test_concat_mixed_objs(self): # axis 0 expected = DataFrame( - np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0] + np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, + index=index.to_list() * 3, + columns=["foo", 0, "bar"], ) result = concat([s1, df, s2]) tm.assert_frame_equal(result, expected) + expected = DataFrame( + np.tile(arr, 3).reshape(-1, 1), index=index.to_list() * 3, columns=[0] + ) + result = concat([s1.rename(0), df, s2.rename(0)]) + tm.assert_frame_equal(result, expected) + expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) result = concat([s1, df, s2], ignore_index=True) tm.assert_frame_equal(result, expected) From 4ba36a50e66acc3307f9b56e7372018d8a0cc03c Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Wed, 6 Dec 2023 11:56:58 -0600 Subject: [PATCH 2/5] Split test into two shorter tests --- pandas/tests/reshape/concat/test_concat.py | 31 +++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 407e3f56e3536..d13ad300c2ca1 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -267,11 +267,10 @@ def test_with_mixed_tuples(self, sort): # it works concat([df1, df2], sort=sort) - def test_concat_mixed_objs(self): - # concat mixed series/frames + def test_concat_mixed_objs_columns(self): + # Test column-wise concat for mixed series/frames (axis=1) # G2385 - # axis 1 index = date_range("01-Jan-2013", periods=10, freq="h") arr = np.arange(10, dtype="int64") s1 = Series(arr, index=index) @@ -324,23 +323,35 @@ def test_concat_mixed_objs(self): result = concat([s1, df, s2], axis=1, ignore_index=True) tm.assert_frame_equal(result, expected) - # axis 0 + def test_concat_mixed_objs_index(self): + # Test row-wise concat for mixed series/frames (axis=0) + # GH2385, GH15047 + + index = date_range("01-Jan-2013", periods=10, freq="h") + arr = np.arange(10, dtype="int64") + s1 = Series(arr, index=index) + s2 = Series(arr, index=index) + df = DataFrame(arr.reshape(-1, 1), index=index) + + # Align series names to column names expected = DataFrame( - np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, - index=index.to_list() * 3, - columns=["foo", 0, "bar"], + np.tile(arr, 3).reshape(-1, 1), index=index.to_list() * 3, columns=[0] ) result = concat([s1, df, s2]) tm.assert_frame_equal(result, expected) + # Separate columns for series not appearing as column names expected = DataFrame( - np.tile(arr, 3).reshape(-1, 1), index=index.to_list() * 3, columns=[0] + np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, + index=index.to_list() * 3, + columns=["foo", 0, "bar"], ) - result = concat([s1.rename(0), df, s2.rename(0)]) + result = concat([s1.rename("foo"), df, s2.rename("bar")]) tm.assert_frame_equal(result, expected) + # Rename all series to 0 when ignore_index=True expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) - result = concat([s1, df, s2], ignore_index=True) + result = concat([s1.rename("foo"), df, s2.rename("bar")], ignore_index=True) tm.assert_frame_equal(result, expected) def test_dtype_coercion(self): From 4a9f30d659ec94a37cb141642e8820e2d7bec129 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:01:30 -0600 Subject: [PATCH 3/5] whatsnew --- doc/source/whatsnew/v2.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index dea986c401b60..52a354b8e54bd 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -636,6 +636,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :func:`concat` ignoring ``sort`` parameter when passed :class:`DatetimeIndex` indexes (:issue:`54769`) +- Bug in :func:`concat` renaming :class:`Series` when ``ignore_index=False`` (:issue:`15047`) - Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`) - Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`) - Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`) From 1ba58df8fa6c98ec353d3bf1bcb9ac2b1e4e7706 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:09:32 -0600 Subject: [PATCH 4/5] tolist --- pandas/tests/reshape/concat/test_concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index d13ad300c2ca1..fcd992334da3f 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -335,7 +335,7 @@ def test_concat_mixed_objs_index(self): # Align series names to column names expected = DataFrame( - np.tile(arr, 3).reshape(-1, 1), index=index.to_list() * 3, columns=[0] + np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0] ) result = concat([s1, df, s2]) tm.assert_frame_equal(result, expected) @@ -343,7 +343,7 @@ def test_concat_mixed_objs_index(self): # Separate columns for series not appearing as column names expected = DataFrame( np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, - index=index.to_list() * 3, + index=index.tolist() * 3, columns=["foo", 0, "bar"], ) result = concat([s1.rename("foo"), df, s2.rename("bar")]) From c16f632105ed8fb54439d78b5830bf72faaff273 Mon Sep 17 00:00:00 2001 From: Rob <124158982+rob-sil@users.noreply.github.com> Date: Thu, 7 Dec 2023 13:01:41 -0600 Subject: [PATCH 5/5] Split test for concat on index --- pandas/tests/reshape/concat/test_concat.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index fcd992334da3f..9e34d02091e69 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -324,7 +324,7 @@ def test_concat_mixed_objs_columns(self): tm.assert_frame_equal(result, expected) def test_concat_mixed_objs_index(self): - # Test row-wise concat for mixed series/frames (axis=0) + # Test row-wise concat for mixed series/frames with a common name # GH2385, GH15047 index = date_range("01-Jan-2013", periods=10, freq="h") @@ -333,25 +333,33 @@ def test_concat_mixed_objs_index(self): s2 = Series(arr, index=index) df = DataFrame(arr.reshape(-1, 1), index=index) - # Align series names to column names expected = DataFrame( np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0] ) result = concat([s1, df, s2]) tm.assert_frame_equal(result, expected) - # Separate columns for series not appearing as column names + def test_concat_mixed_objs_index_names(self): + # Test row-wise concat for mixed series/frames with distinct names + # GH2385, GH15047 + + index = date_range("01-Jan-2013", periods=10, freq="h") + arr = np.arange(10, dtype="int64") + s1 = Series(arr, index=index, name="foo") + s2 = Series(arr, index=index, name="bar") + df = DataFrame(arr.reshape(-1, 1), index=index) + expected = DataFrame( np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T, index=index.tolist() * 3, columns=["foo", 0, "bar"], ) - result = concat([s1.rename("foo"), df, s2.rename("bar")]) + result = concat([s1, df, s2]) tm.assert_frame_equal(result, expected) # Rename all series to 0 when ignore_index=True expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) - result = concat([s1.rename("foo"), df, s2.rename("bar")], ignore_index=True) + result = concat([s1, df, s2], ignore_index=True) tm.assert_frame_equal(result, expected) def test_dtype_coercion(self):