From 821def9085d5374e2674329ea97fbfd3afa6dfa9 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 16 Jul 2025 22:45:19 +0000 Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 6 +++ .../X86/matched-nodes-updated.ll | 44 ++++++++----------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index da6af353c709f..9446e16532115 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11693,6 +11693,7 @@ void BoUpSLP::transformNodes() { if (StartIdx + VF > End) continue; SmallVector> Slices; + bool AllStrided = true; for (unsigned Cnt = StartIdx; Cnt + VF <= End; Cnt += VF) { ArrayRef Slice = VL.slice(Cnt, VF); // If any instruction is vectorized already - do not try again. @@ -11743,6 +11744,9 @@ void BoUpSLP::transformNodes() { SmallVector PointerOps; LoadsState Res = canVectorizeLoads(Slice, Slice.front(), Order, PointerOps); + AllStrided &= Res == LoadsState::StridedVectorize || + Res == LoadsState::ScatterVectorize || + Res == LoadsState::Gather; // Do not vectorize gathers. if (Res == LoadsState::ScatterVectorize || Res == LoadsState::Gather) { @@ -11772,6 +11776,8 @@ void BoUpSLP::transformNodes() { } Slices.emplace_back(Cnt, Slice.size()); } + if (VF == 2 && AllStrided && Slices.size() > 2) + continue; auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt, unsigned Sz) { E.CombinedEntriesWithIndices.emplace_back(Idx, Cnt); if (StartIdx == Cnt) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll index f56af934f19f5..b1864b43512d8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll @@ -14,50 +14,44 @@ define i32 @test(i32 %s.0) { ; CHECK: [[IF_END3:.*]]: ; CHECK-NEXT: br label %[[IF_END6:.*]] ; CHECK: [[IF_END6]]: -; CHECK-NEXT: [[J_4:%.*]] = phi i32 [ 0, %[[IF_END3]] ], [ [[TMP28:%.*]], %[[O]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ] -; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP22:%.*]], %[[O]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP24:%.*]], %[[O]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP29:%.*]], %[[O]] ] ; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP7]], <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> -; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> , i32 [[TMP22:%.*]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <8 x i32> [[TMP27]], <8 x i32> [[TMP30]], <8 x i32> ; CHECK-NEXT: br i1 false, label %[[IF_END24:.*]], label %[[IF_THEN11:.*]] ; CHECK: [[IF_THEN11]]: -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> poison, i32 [[J_4]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i32> [[TMP15]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i32> [[TMP14]], <8 x i32> [[TMP16]], <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP11]], <8 x i32> ; CHECK-NEXT: br label %[[IF_END24]] ; CHECK: [[IF_THEN18:.*]]: ; CHECK-NEXT: br label %[[T]] ; CHECK: [[T]]: -; CHECK-NEXT: [[TMP34:%.*]] = phi <8 x i32> [ [[TMP33:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi <8 x i32> [ [[TMP33:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ] ; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0 ; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]] ; CHECK: [[IF_END24]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP31]], %[[IF_END6]] ], [ [[TMP34]], %[[T]] ] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP12]], %[[IF_THEN11]] ], [ [[TMP31]], %[[IF_END6]] ], [ [[TMP13]], %[[T]] ] ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: br label %[[O]] ; CHECK: [[O]]: -; CHECK-NEXT: [[TMP22]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP19]], %[[IF_END24]] ] ; CHECK-NEXT: [[TMP23]] = phi <4 x i32> [ [[TMP1]], %[[K]] ], [ [[TMP20]], %[[IF_END24]] ] -; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP21]], %[[IF_END24]] ] +; CHECK-NEXT: [[TMP24]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP19]], %[[IF_END24]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP34]], %[[IF_END24]] ] +; CHECK-NEXT: [[TMP22]] = extractelement <2 x i32> [[TMP24]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP25]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP33]] = shufflevector <8 x i32> [[TMP26]], <8 x i32> [[TMP32]], <8 x i32> -; CHECK-NEXT: [[TMP28]] = extractelement <4 x i32> [[TMP24]], i32 3 +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <2 x i32> [[TMP24]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP29]] = shufflevector <2 x i32> [[TMP35]], <2 x i32> [[TMP28]], <2 x i32> ; CHECK-NEXT: br i1 false, label %[[T]], label %[[IF_END6]] ; entry: From c911b47b2538ae6ec3269425876198dcc982c0dd Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 17 Jul 2025 11:59:36 +0000 Subject: [PATCH 2/2] address comments Created using spr 1.3.5 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9446e16532115..6ad5c60105a28 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11776,6 +11776,9 @@ void BoUpSLP::transformNodes() { } Slices.emplace_back(Cnt, Slice.size()); } + // Do not try to vectorize if all slides are strided or gathered with + // vector factor 2 and there are more than 2 slices. Better to handle + // them in gathered loads analysis, may result in better vectorization. if (VF == 2 && AllStrided && Slices.size() > 2) continue; auto AddCombinedNode = [&](unsigned Idx, unsigned Cnt, unsigned Sz) {