From c4e1c186d7fa0cb2800d6a3ab430ab916a7a1310 Mon Sep 17 00:00:00 2001 From: Niruta Talwekar Date: Tue, 24 Jun 2025 14:10:47 -0700 Subject: [PATCH 1/7] slack link update --- doc/source/development/community.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/community.rst b/doc/source/development/community.rst index 1c698d130ea6c..e139ea0376771 100644 --- a/doc/source/development/community.rst +++ b/doc/source/development/community.rst @@ -114,7 +114,7 @@ people who are hesitant to bring up their questions or ideas on a large public mailing list or GitHub. If this sounds like the right place for you, you are welcome to join using -`this link `_! +`this link `_! Please remember to follow our `Code of Conduct `_, and be aware that our admins are monitoring for irrelevant messages and will remove folks who use our From 699a9db4eeba42da51e03cd18f3bb1150250065e Mon Sep 17 00:00:00 2001 From: Niruta Talwekar Date: Mon, 7 Jul 2025 10:15:17 -0700 Subject: [PATCH 2/7] Doc improvement fro setitem --- doc/source/user_guide/indexing.rst | 50 +++++++++++++++++++++ pandas/core/frame.py | 72 ++++++++++++++++++++++++++++++ pandas/core/indexing.py | 17 +++++++ pandas/tests/frame/test_api.py | 19 ++++++++ 4 files changed, 158 insertions(+) diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 605f9501c5b23..7cb6dea064476 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1732,3 +1732,53 @@ Why does assignment fail when using chained indexing? This means that chained indexing will never work. See :ref:`this section ` for more context. + +.. _indexing.series_assignment: + +Series Assignment and Index Alignment +------------------------------------- + +When assigning a Series to a DataFrame column, pandas performs automatic alignment +based on index labels. This is a fundamental behavior that can be surprising to +new users who might expect positional assignment. + +Key Points: +~~~~~~~~~~~ + +* Series values are matched to DataFrame rows by index label +* Position/order in the Series doesn't matter +* Missing index labels result in NaN values +* This behavior is consistent across df[col] = series and df.loc[:, col] = series + +Examples: +.. ipython:: python + + import pandas as pd + + # Create a DataFrame + df = pd.DataFrame({'values': [1, 2, 3]}, index=['x', 'y', 'z']) + + # Series with matching indices (different order) + s1 = pd.Series([10, 20, 30], index=['z', 'x', 'y']) + df['aligned'] = s1 # Aligns by index, not position + print(df) + + # Series with partial index match + s2 = pd.Series([100, 200], index=['x', 'z']) + df['partial'] = s2 # Missing 'y' gets NaN + print(df) + + # Series with non-matching indices + s3 = pd.Series([1000, 2000], index=['a', 'b']) + df['nomatch'] = s3 # All values become NaN + print(df) + + + #Avoiding Confusion: + #If you want positional assignment instead of index alignment: + # Convert Series to array/list for positional assignment + + df['positional'] = s1.values # or s1.tolist() + + # Or reset the Series index to match DataFrame index + df['reset_index'] = s1.reindex(df.index) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8053c17437c5e..d6e7be522a83e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4214,6 +4214,78 @@ def isetitem(self, loc, value) -> None: self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) def __setitem__(self, key, value) -> None: + """ + Set item(s) in DataFrame by key. + + This method allows you to set the values of one or more columns in the + DataFrame using a key. The key can be a single column label, a list of + labels, or a boolean array. If the key does not exist, a new + column will be created. + + Parameters + ---------- + key : str, list of str, or tuple + Column label(s) to set. Can be a single column name, list of column names, + or tuple for MultiIndex columns. + value : scalar, array-like, Series, or DataFrame + Value(s) to set for the specified key(s). + + Returns + ------- + None + This method does not return a value. + + See Also + -------- + DataFrame.loc : Access and set values by label-based indexing. + DataFrame.iloc : Access and set values by position-based indexing. + DataFrame.assign : Assign new columns to a DataFrame. + + Notes + ----- + When assigning a Series to a DataFrame column, pandas aligns the Series + by index labels, not by position. This means: + + * Values from the Series are matched to DataFrame rows by index label + * If a Series index label doesn't exist in the DataFrame index, it's ignored + * If a DataFrame index label doesn't exist in the Series index, NaN is assigned + * The order of values in the Series doesn't matter; only the index labels matter + + Examples + -------- + Basic column assignment: + + >>> df = pd.DataFrame({"A": [1, 2, 3]}) + >>> df["B"] = [4, 5, 6] # Assigns by position + >>> df + A B + 0 1 4 + 1 2 5 + 2 3 6 + + Series assignment with index alignment: + + >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2]) + >>> s = pd.Series([10, 20], index=[1, 3]) # Note: index 3 doesn't exist in df + >>> df["B"] = s # Assigns by index label, not position + >>> df + A B + 0 1 NaN + 1 2 10 + 2 3 NaN + + Series assignment with partial index match: + + >>> df = pd.DataFrame({"A": [1, 2, 3, 4]}, index=["a", "b", "c", "d"]) + >>> s = pd.Series([100, 200], index=["b", "d"]) + >>> df["B"] = s + >>> df + A B + a 1 NaN + b 2 100 + c 3 NaN + d 4 200 + """ if not PYPY: if sys.getrefcount(self) <= 3: warnings.warn( diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 34a437ba40bd8..53f18f5e2f574 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -610,6 +610,23 @@ def loc(self) -> _LocIndexer: Please see the :ref:`user guide` for more details and explanations of advanced indexing. + + **Assignment with Series** + + When assigning a Series to .loc[row_indexer, col_indexer], pandas aligns + the Series by index labels, not by order or position. This is consistent + with pandas' general alignment behavior. + + Series assignment with .loc and index alignment: + + >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2]) + >>> s = pd.Series([10, 20], index=[1, 0]) # Note reversed order + >>> df.loc[:, "B"] = s # Aligns by index, not order + >>> df + A B + 0 1 20 + 1 2 10 + 2 3 NaN """ return _LocIndexer("loc", self) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 2b0bf1b0576f9..f0d00cb13a285 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -378,3 +378,22 @@ def test_inspect_getmembers(self): # GH38740 df = DataFrame() inspect.getmembers(df) + + def test_setitem_series_alignment_documentation(self): + # Test that Series assignment aligns by index as documented. + df = DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2]) + s = Series([10, 20], index=[1, 3]) + df["B"] = s + expected = DataFrame({"A": [1, 2, 3], "B": [np.nan, 10, np.nan]}) + tm.assert_frame_equal(df, expected) + + def test_setitem_series_partial_alignment(self): + # Test Series assignment with partial index match. """ + df = DataFrame({"A": [1, 2, 3, 4]}, index=["a", "b", "c", "d"]) + s = Series([100, 200], index=["b", "d"]) + df["B"] = s + expected = DataFrame( + {"A": [1, 2, 3, 4], "B": [np.nan, 100, np.nan, 200]}, + index=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(df, expected) From be86001746ebfbc102feeb3263e77040a8429471 Mon Sep 17 00:00:00 2001 From: Niruta Talwekar Date: Mon, 7 Jul 2025 11:33:12 -0700 Subject: [PATCH 3/7] float type conversion --- pandas/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 53f18f5e2f574..3ec59a9dd9f96 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -624,8 +624,8 @@ def loc(self) -> _LocIndexer: >>> df.loc[:, "B"] = s # Aligns by index, not order >>> df A B - 0 1 20 - 1 2 10 + 0 1 20.0 + 1 2 10.0 2 3 NaN """ return _LocIndexer("loc", self) From f792b390afde0ba50ada83d72a9e23dd3eea6086 Mon Sep 17 00:00:00 2001 From: Niruta Talwekar Date: Wed, 9 Jul 2025 21:59:03 -0700 Subject: [PATCH 4/7] remove tests --- pandas/tests/frame/test_api.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index f0d00cb13a285..2b0bf1b0576f9 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -378,22 +378,3 @@ def test_inspect_getmembers(self): # GH38740 df = DataFrame() inspect.getmembers(df) - - def test_setitem_series_alignment_documentation(self): - # Test that Series assignment aligns by index as documented. - df = DataFrame({"A": [1, 2, 3]}, index=[0, 1, 2]) - s = Series([10, 20], index=[1, 3]) - df["B"] = s - expected = DataFrame({"A": [1, 2, 3], "B": [np.nan, 10, np.nan]}) - tm.assert_frame_equal(df, expected) - - def test_setitem_series_partial_alignment(self): - # Test Series assignment with partial index match. """ - df = DataFrame({"A": [1, 2, 3, 4]}, index=["a", "b", "c", "d"]) - s = Series([100, 200], index=["b", "d"]) - df["B"] = s - expected = DataFrame( - {"A": [1, 2, 3, 4], "B": [np.nan, 100, np.nan, 200]}, - index=["a", "b", "c", "d"], - ) - tm.assert_frame_equal(df, expected) From 0d938a04eaef4620d445fff42f832a1c09f7a554 Mon Sep 17 00:00:00 2001 From: Niruta Talwekar Date: Wed, 9 Jul 2025 22:01:10 -0700 Subject: [PATCH 5/7] remove redundant message --- pandas/core/indexing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3ec59a9dd9f96..961429972b169 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -614,8 +614,7 @@ def loc(self) -> _LocIndexer: **Assignment with Series** When assigning a Series to .loc[row_indexer, col_indexer], pandas aligns - the Series by index labels, not by order or position. This is consistent - with pandas' general alignment behavior. + the Series by index labels, not by order or position. Series assignment with .loc and index alignment: From ed3b1733f833e587c2688808e93a915911483ae5 Mon Sep 17 00:00:00 2001 From: Niruta Talwekar Date: Wed, 9 Jul 2025 22:03:12 -0700 Subject: [PATCH 6/7] rename df column --- doc/source/user_guide/indexing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 7cb6dea064476..bb4c6a006d650 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1781,4 +1781,4 @@ Examples: df['positional'] = s1.values # or s1.tolist() # Or reset the Series index to match DataFrame index - df['reset_index'] = s1.reindex(df.index) + df['s1_values'] = s1.reindex(df.index) From eb9db3cd7ed3e1a933f763a284dad31e27816806 Mon Sep 17 00:00:00 2001 From: Niruta Talwekar Date: Wed, 9 Jul 2025 23:00:54 -0700 Subject: [PATCH 7/7] ignored example --- doc/source/user_guide/indexing.rst | 2 +- pandas/core/frame.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index bb4c6a006d650..8582d14811749 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1778,7 +1778,7 @@ Examples: #If you want positional assignment instead of index alignment: # Convert Series to array/list for positional assignment - df['positional'] = s1.values # or s1.tolist() + df['positional'] = s1.tolist() # Or reset the Series index to match DataFrame index df['s1_values'] = s1.reindex(df.index) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d6e7be522a83e..9b05dda6500c4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4285,6 +4285,18 @@ def __setitem__(self, key, value) -> None: b 2 100 c 3 NaN d 4 200 + + Series index labels NOT in DataFrame, ignored: + + >>> df = pd.DataFrame({"A": [1, 2, 3]}, index=["x", "y", "z"]) + >>> s = pd.Series([10, 20, 30, 40, 50], index=["x", "y", "a", "b", "z"]) + >>> df["B"] = s + >>> df + A B + x 1 10 + y 2 20 + z 3 50 + # Values for 'a' and 'b' are completely ignored! """ if not PYPY: if sys.getrefcount(self) <= 3: