From c4b205c93a6403316bcf94a27a7a44b5e8861bcd Mon Sep 17 00:00:00 2001 From: william Date: Sun, 13 Jul 2025 21:53:59 +0800 Subject: [PATCH 1/2] X86: Remove LowerToHorizontalOp and modified test case --- llvm/lib/Target/X86/X86ISelLowering.cpp | 118 ---- .../PhaseOrdering}/X86/haddsub-2.ll | 195 +++++- .../PhaseOrdering}/X86/haddsub-shuf.ll | 285 ++++++++- .../PhaseOrdering}/X86/haddsub-undef.ll | 407 +++++++++++- .../PhaseOrdering}/X86/haddsub.ll | 590 +++++++++++++++++- .../PhaseOrdering}/X86/phaddsub-undef.ll | 78 ++- 6 files changed, 1502 insertions(+), 171 deletions(-) rename llvm/test/{CodeGen => Transforms/PhaseOrdering}/X86/haddsub-2.ll (81%) rename llvm/test/{CodeGen => Transforms/PhaseOrdering}/X86/haddsub-shuf.ll (73%) rename llvm/test/{CodeGen => Transforms/PhaseOrdering}/X86/haddsub-undef.ll (57%) rename llvm/test/{CodeGen => Transforms/PhaseOrdering}/X86/haddsub.ll (64%) rename llvm/test/{CodeGen => Transforms/PhaseOrdering}/X86/phaddsub-undef.ll (53%) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f8f29b9f2cdc7..677ecf8801e2d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8569,122 +8569,6 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, return DAG.getNode(HOpcode, DL, VT, V0, V1); } -/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible. -static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const SDLoc &DL, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { - // We need at least 2 non-undef elements to make this worthwhile by default. - unsigned NumNonUndefs = - count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); }); - if (NumNonUndefs < 2) - return SDValue(); - - // There are 4 sets of horizontal math operations distinguished by type: - // int/FP at 128-bit/256-bit. Each type was introduced with a different - // subtarget feature. Try to match those "native" patterns first. - MVT VT = BV->getSimpleValueType(0); - if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) || - ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) || - ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) || - ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) { - unsigned HOpcode; - SDValue V0, V1; - if (isHopBuildVector(BV, DAG, HOpcode, V0, V1)) - return getHopForBuildVector(BV, DL, DAG, HOpcode, V0, V1); - } - - // Try harder to match 256-bit ops by using extract/concat. - if (!Subtarget.hasAVX() || !VT.is256BitVector()) - return SDValue(); - - // Count the number of UNDEF operands in the build_vector in input. - unsigned NumElts = VT.getVectorNumElements(); - unsigned Half = NumElts / 2; - unsigned NumUndefsLO = 0; - unsigned NumUndefsHI = 0; - for (unsigned i = 0, e = Half; i != e; ++i) - if (BV->getOperand(i)->isUndef()) - NumUndefsLO++; - - for (unsigned i = Half, e = NumElts; i != e; ++i) - if (BV->getOperand(i)->isUndef()) - NumUndefsHI++; - - SDValue InVec0, InVec1; - if (VT == MVT::v8i32 || VT == MVT::v16i16) { - SDValue InVec2, InVec3; - unsigned X86Opcode; - bool CanFold = true; - - if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, Half, InVec0, InVec1) && - isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, Half, NumElts, InVec2, - InVec3) && - ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && - ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) - X86Opcode = X86ISD::HADD; - else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, Half, InVec0, - InVec1) && - isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, Half, NumElts, InVec2, - InVec3) && - ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && - ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) - X86Opcode = X86ISD::HSUB; - else - CanFold = false; - - if (CanFold) { - // Do not try to expand this build_vector into a pair of horizontal - // add/sub if we can emit a pair of scalar add/sub. - if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) - return SDValue(); - - // Convert this build_vector into a pair of horizontal binops followed by - // a concat vector. We must adjust the outputs from the partial horizontal - // matching calls above to account for undefined vector halves. - SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0; - SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1; - assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?"); - bool isUndefLO = NumUndefsLO == Half; - bool isUndefHI = NumUndefsHI == Half; - return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO, - isUndefHI); - } - } - - if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || - VT == MVT::v16i16) { - unsigned X86Opcode; - if (isHorizontalBinOpPart(BV, ISD::ADD, DL, DAG, 0, NumElts, InVec0, - InVec1)) - X86Opcode = X86ISD::HADD; - else if (isHorizontalBinOpPart(BV, ISD::SUB, DL, DAG, 0, NumElts, InVec0, - InVec1)) - X86Opcode = X86ISD::HSUB; - else if (isHorizontalBinOpPart(BV, ISD::FADD, DL, DAG, 0, NumElts, InVec0, - InVec1)) - X86Opcode = X86ISD::FHADD; - else if (isHorizontalBinOpPart(BV, ISD::FSUB, DL, DAG, 0, NumElts, InVec0, - InVec1)) - X86Opcode = X86ISD::FHSUB; - else - return SDValue(); - - // Don't try to expand this build_vector into a pair of horizontal add/sub - // if we can simply emit a pair of scalar add/sub. - if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) - return SDValue(); - - // Convert this build_vector into two horizontal add/sub followed by - // a concat vector. - bool isUndefLO = NumUndefsLO == Half; - bool isUndefHI = NumUndefsHI == Half; - return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true, - isUndefLO, isUndefHI); - } - - return SDValue(); -} - static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG); @@ -9270,8 +9154,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, dl, Subtarget, DAG)) return AddSub; - if (SDValue HorizontalOp = LowerToHorizontalOp(BV, dl, Subtarget, DAG)) - return HorizontalOp; if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, dl, Subtarget, DAG)) return Broadcast; if (SDValue BitOp = lowerBuildVectorToBitOp(BV, dl, Subtarget, DAG)) diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll similarity index 81% rename from llvm/test/CodeGen/X86/haddsub-2.ll rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll index bca446fa8fb56..4eb5bdba9edb6 100644 --- a/llvm/test/CodeGen/X86/haddsub-2.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-2.ll @@ -1,38 +1,39 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3,+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s define <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) { -; SSE-LABEL: hadd_ps_test1: -; SSE: # %bb.0: -; SSE-NEXT: haddps %xmm1, %xmm0 -; SSE-NEXT: retq +; CHECK-LABEL: define <4 x float> @hadd_ps_test1( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] ; -; AVX-LABEL: hadd_ps_test1: -; AVX: # %bb.0: -; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq + %vecext = extractelement <4 x float> %A, i32 0 %vecext1 = extractelement <4 x float> %A, i32 1 %add = fadd float %vecext, %vecext1 %vecinit = insertelement <4 x float> undef, float %add, i32 0 + %vecext2 = extractelement <4 x float> %A, i32 2 %vecext3 = extractelement <4 x float> %A, i32 3 %add4 = fadd float %vecext2, %vecext3 %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 + %vecext6 = extractelement <4 x float> %B, i32 0 %vecext7 = extractelement <4 x float> %B, i32 1 %add8 = fadd float %vecext6, %vecext7 %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2 + %vecext10 = extractelement <4 x float> %B, i32 2 %vecext11 = extractelement <4 x float> %B, i32 3 %add12 = fadd float %vecext10, %vecext11 %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3 + ret <4 x float> %vecinit13 } + define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) { ; SSE-LABEL: hadd_ps_test2: ; SSE: # %bb.0: @@ -43,6 +44,13 @@ define <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hadd_ps_test2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; %vecext = extractelement <4 x float> %A, i32 2 %vecext1 = extractelement <4 x float> %A, i32 3 %add = fadd float %vecext, %vecext1 @@ -72,6 +80,13 @@ define <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsub_ps_test1( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; %vecext = extractelement <4 x float> %A, i32 0 %vecext1 = extractelement <4 x float> %A, i32 1 %sub = fsub float %vecext, %vecext1 @@ -101,6 +116,13 @@ define <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsub_ps_test2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; %vecext = extractelement <4 x float> %A, i32 2 %vecext1 = extractelement <4 x float> %A, i32 3 %sub = fsub float %vecext, %vecext1 @@ -159,6 +181,13 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @phadd_d_test1( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 0 %vecext1 = extractelement <4 x i32> %A, i32 1 %add = add i32 %vecext, %vecext1 @@ -217,6 +246,13 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @phadd_d_test2( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 2 %vecext1 = extractelement <4 x i32> %A, i32 3 %add = add i32 %vecext, %vecext1 @@ -275,6 +311,13 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @phsub_d_test1( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 0 %vecext1 = extractelement <4 x i32> %A, i32 1 %sub = sub i32 %vecext, %vecext1 @@ -333,6 +376,13 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @phsub_d_test2( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 2 %vecext1 = extractelement <4 x i32> %A, i32 3 %sub = sub i32 %vecext, %vecext1 @@ -362,6 +412,13 @@ define <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hadd_pd_test1( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %A, i32 0 %vecext1 = extractelement <2 x double> %A, i32 1 %add = fadd double %vecext, %vecext1 @@ -383,6 +440,13 @@ define <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hadd_pd_test2( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %A, i32 1 %vecext1 = extractelement <2 x double> %A, i32 0 %add = fadd double %vecext, %vecext1 @@ -404,6 +468,13 @@ define <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsub_pd_test1( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %A, i32 0 %vecext1 = extractelement <2 x double> %A, i32 1 %sub = fsub double %vecext, %vecext1 @@ -425,6 +496,13 @@ define <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsub_pd_test2( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %B, i32 0 %vecext1 = extractelement <2 x double> %B, i32 1 %sub = fsub double %vecext, %vecext1 @@ -456,6 +534,13 @@ define <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) { ; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x double> @avx_vhadd_pd_test( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; %vecext = extractelement <4 x double> %A, i32 0 %vecext1 = extractelement <4 x double> %A, i32 1 %add = fadd double %vecext, %vecext1 @@ -495,6 +580,13 @@ define <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) { ; AVX2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x double> @avx_vhsub_pd_test( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; %vecext = extractelement <4 x double> %A, i32 0 %vecext1 = extractelement <4 x double> %A, i32 1 %sub = fsub double %vecext, %vecext1 @@ -590,6 +682,13 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) { ; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @avx2_vphadd_d_test( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; %vecext = extractelement <8 x i32> %A, i32 0 %vecext1 = extractelement <8 x i32> %A, i32 1 %add = add i32 %vecext, %vecext1 @@ -745,6 +844,13 @@ define <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX2-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @avx2_vphadd_w_test( +; CHECK-SAME: <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; %vecext = extractelement <16 x i16> %a, i32 0 %vecext1 = extractelement <16 x i16> %a, i32 1 %add = add i16 %vecext, %vecext1 @@ -863,6 +969,13 @@ define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) { ; AVX-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ; AVX-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @not_a_hsub_1( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; %vecext = extractelement <4 x i32> %A, i32 0 %vecext1 = extractelement <4 x i32> %A, i32 1 %sub = sub i32 %vecext, %vecext1 @@ -920,6 +1033,13 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[0] ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @not_a_hsub_2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; %vecext = extractelement <4 x float> %A, i32 2 %vecext1 = extractelement <4 x float> %A, i32 3 %sub = fsub float %vecext, %vecext1 @@ -960,6 +1080,13 @@ define <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) { ; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm0 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @not_a_hsub_3( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> +; CHECK-NEXT: [[VECINIT2:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[VECINIT2]] +; %vecext = extractelement <2 x double> %B, i32 0 %vecext1 = extractelement <2 x double> %B, i32 1 %sub = fsub double %vecext, %vecext1 @@ -985,6 +1112,13 @@ define <8 x float> @avx_vhadd_ps(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @avx_vhadd_ps( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -1031,6 +1165,13 @@ define <8 x float> @avx_vhsub_ps(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @avx_vhsub_ps( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %sub = fsub float %vecext, %vecext1 @@ -1077,6 +1218,13 @@ define <4 x double> @avx_hadd_pd(<4 x double> %a, <4 x double> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @avx_hadd_pd( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; %vecext = extractelement <4 x double> %a, i32 0 %vecext1 = extractelement <4 x double> %a, i32 1 %add = fadd double %vecext, %vecext1 @@ -1107,6 +1255,13 @@ define <4 x double> @avx_hsub_pd(<4 x double> %a, <4 x double> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @avx_hsub_pd( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; %vecext = extractelement <4 x double> %a, i32 0 %vecext1 = extractelement <4 x double> %a, i32 1 %sub = fsub double %vecext, %vecext1 @@ -1202,6 +1357,13 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @avx2_hadd_d( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -1355,6 +1517,13 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @avx2_hadd_w( +; CHECK-SAME: <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; %vecext = extractelement <16 x i16> %a, i32 0 %vecext1 = extractelement <16 x i16> %a, i32 1 %add = add i16 %vecext, %vecext1 diff --git a/llvm/test/CodeGen/X86/haddsub-shuf.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll similarity index 73% rename from llvm/test/CodeGen/X86/haddsub-shuf.ll rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll index 364ad953a11d4..f425550c1c6df 100644 --- a/llvm/test/CodeGen/X86/haddsub-shuf.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-shuf.ll @@ -1,15 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE_SLOW,SSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE_FAST,SSE3 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE_SLOW,SSSE3,SSSE3_SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE_FAST,SSSE3,SSSE3_FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1,AVX1_SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX1,AVX1_FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2_SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX2,AVX2_FAST - -; The next 8 tests check for matching the horizontal op and eliminating the shuffle. -; PR34111 - https://bugs.llvm.org/show_bug.cgi?id=34111 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s define <4 x float> @hadd_v4f32(<4 x float> %a) { ; SSE-LABEL: hadd_v4f32: @@ -21,6 +11,13 @@ define <4 x float> @hadd_v4f32(<4 x float> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hadd_v4f32( +; CHECK-SAME: <4 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUF]] +; %a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> %a13 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> %hop = fadd <2 x float> %a02, %a13 @@ -65,6 +62,13 @@ define <8 x float> @hadd_v8f32a(<8 x float> %a) { ; AVX2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x float> @hadd_v8f32a( +; CHECK-SAME: <8 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[SHUF]] +; %a0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %a1 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %hop = fadd <4 x float> %a0, %a1 @@ -83,6 +87,13 @@ define <8 x float> @hadd_v8f32b(<8 x float> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @hadd_v8f32b( +; CHECK-SAME: <8 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[SHUF]] +; %a0 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> %a1 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> %hop = fadd <8 x float> %a0, %a1 @@ -100,6 +111,13 @@ define <4 x float> @hsub_v4f32(<4 x float> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsub_v4f32( +; CHECK-SAME: <4 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUF]] +; %a02 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> %a13 = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> %hop = fsub <2 x float> %a02, %a13 @@ -144,6 +162,13 @@ define <8 x float> @hsub_v8f32a(<8 x float> %a) { ; AVX2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x float> @hsub_v8f32a( +; CHECK-SAME: <8 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[SHUF]] +; %a0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %a1 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> %hop = fsub <4 x float> %a0, %a1 @@ -162,6 +187,13 @@ define <8 x float> @hsub_v8f32b(<8 x float> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @hsub_v8f32b( +; CHECK-SAME: <8 x float> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x float> [[SHUF]] +; %a0 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> %a1 = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> %hop = fsub <8 x float> %a0, %a1 @@ -206,6 +238,13 @@ define <2 x double> @hadd_v2f64(<2 x double> %a) { ; AVX2_FAST: # %bb.0: ; AVX2_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX2_FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @hadd_v2f64( +; CHECK-SAME: <2 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[SHUF]] +; %a0 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> %a1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> %hop = fadd <2 x double> %a0, %a1 @@ -250,6 +289,13 @@ define <2 x double> @hadd_v2f64_scalar_splat(<2 x double> %a) { ; AVX2_FAST: # %bb.0: ; AVX2_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX2_FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @hadd_v2f64_scalar_splat( +; CHECK-SAME: <2 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x double> [[SHUF]] +; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 %hop = fadd double %a0, %a1 @@ -281,6 +327,13 @@ define <4 x double> @hadd_v4f64_scalar_splat(<4 x double> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @hadd_v4f64_scalar_splat( +; CHECK-SAME: <4 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[SHUF]] +; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %hop0 = fadd double %a0, %a1 @@ -335,6 +388,13 @@ define <4 x double> @hadd_v4f64_scalar_broadcast(<4 x double> %a) { ; AVX2_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX2_FAST-NEXT: vbroadcastsd %xmm0, %ymm0 ; AVX2_FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @hadd_v4f64_scalar_broadcast( +; CHECK-SAME: <4 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[SHUF]] +; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %hop0 = fadd double %a0, %a1 @@ -370,6 +430,13 @@ define <4 x double> @hadd_v4f64(<4 x double> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @hadd_v4f64( +; CHECK-SAME: <4 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[SHUF]] +; %a0 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> %a1 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> %hop = fadd <4 x double> %a0, %a1 @@ -414,6 +481,12 @@ define <2 x double> @hsub_v2f64(<2 x double> %a) { ; AVX2_FAST: # %bb.0: ; AVX2_FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX2_FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsub_v2f64( +; CHECK-SAME: <2 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <2 x double> [[TMP1]], [[A]] +; CHECK-NEXT: ret <2 x double> [[SHUF]] +; %a0 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> %a1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> %hop = fsub <2 x double> %a0, %a1 @@ -444,6 +517,13 @@ define <4 x double> @hsub_v4f64(<4 x double> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @hsub_v4f64( +; CHECK-SAME: <4 x double> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x double> [[SHUF]] +; %a0 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> %a1 = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> %hop = fsub <4 x double> %a0, %a1 @@ -468,6 +548,13 @@ define <4 x i32> @hadd_v4i32(<4 x i32> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @hadd_v4i32( +; CHECK-SAME: <4 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; %a02 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %a13 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %hop = add <4 x i32> %a02, %a13 @@ -524,6 +611,13 @@ define <8 x i32> @hadd_v8i32a(<8 x i32> %a) { ; AVX2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hadd_v8i32a( +; CHECK-SAME: <8 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[SHUF]] +; %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %hop = add <4 x i32> %a0, %a1 @@ -560,6 +654,13 @@ define <8 x i32> @hadd_v8i32b(<8 x i32> %a) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hadd_v8i32b( +; CHECK-SAME: <8 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[SHUF]] +; %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %hop = add <8 x i32> %a0, %a1 @@ -584,6 +685,13 @@ define <4 x i32> @hsub_v4i32(<4 x i32> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vphsubd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x i32> @hsub_v4i32( +; CHECK-SAME: <4 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; %a02 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %a13 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> %hop = sub <4 x i32> %a02, %a13 @@ -640,6 +748,13 @@ define <8 x i32> @hsub_v8i32a(<8 x i32> %a) { ; AVX2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hsub_v8i32a( +; CHECK-SAME: <8 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[SHUF]] +; %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> %hop = sub <4 x i32> %a0, %a1 @@ -676,6 +791,13 @@ define <8 x i32> @hsub_v8i32b(<8 x i32> %a) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphsubd %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hsub_v8i32b( +; CHECK-SAME: <8 x i32> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i32> [[SHUF]] +; %a0 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %a1 = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> %hop = sub <8 x i32> %a0, %a1 @@ -705,6 +827,13 @@ define <8 x i16> @hadd_v8i16(<8 x i16> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x i16> @hadd_v8i16( +; CHECK-SAME: <8 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i16> [[SHUF]] +; %a0246 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %a1357 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %hop = add <8 x i16> %a0246, %a1357 @@ -768,6 +897,13 @@ define <16 x i16> @hadd_v16i16a(<16 x i16> %a) { ; AVX2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hadd_v16i16a( +; CHECK-SAME: <16 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[SHUF]] +; %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %hop = add <8 x i16> %a0, %a1 @@ -820,6 +956,13 @@ define <16 x i16> @hadd_v16i16b(<16 x i16> %a) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphaddw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hadd_v16i16b( +; CHECK-SAME: <16 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = add <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[SHUF]] +; %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> %hop = add <16 x i16> %a0, %a1 @@ -845,6 +988,13 @@ define <8 x i16> @hsub_v8i16(<8 x i16> %a) { ; AVX: # %bb.0: ; AVX-NEXT: vphsubw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x i16> @hsub_v8i16( +; CHECK-SAME: <8 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <8 x i16> [[SHUF]] +; %a0246 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %a1357 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %hop = sub <8 x i16> %a0246, %a1357 @@ -908,6 +1058,13 @@ define <16 x i16> @hsub_v16i16a(<16 x i16> %a) { ; AVX2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1] ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hsub_v16i16a( +; CHECK-SAME: <16 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[SHUF]] +; %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> %hop = sub <8 x i16> %a0, %a1 @@ -960,6 +1117,13 @@ define <16 x i16> @hsub_v16i16b(<16 x i16> %a) { ; AVX2: # %bb.0: ; AVX2-NEXT: vphsubw %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hsub_v16i16b( +; CHECK-SAME: <16 x i16> [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <16 x i16> [[SHUF]] +; %a0 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> %a1 = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> %hop = sub <16 x i16> %a0, %a1 @@ -985,6 +1149,12 @@ define <4 x float> @broadcast_haddps_v4f32(<4 x float> %a0) { ; AVX2-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vbroadcastss %xmm0, %xmm0 ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x float> @broadcast_haddps_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> [[A0]], <4 x float> [[A0]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[TMP2]] +; %1 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a0) %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %2 @@ -1002,6 +1172,13 @@ define <4 x float> @PR34724_1(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_1( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[VECINIT13:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[VECINIT13]] +; %t0 = shufflevector <4 x float> %a, <4 x float> %b, <2 x i32> %t1 = shufflevector <4 x float> %a, <4 x float> %b, <2 x i32> %t2 = fadd <2 x float> %t0, %t1 @@ -1022,6 +1199,13 @@ define <4 x float> @PR34724_2(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[VECINIT13:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[VECINIT13]] +; %t0 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> %t1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> %t2 = fadd <4 x float> %t0, %t1 @@ -1051,6 +1235,13 @@ define <4 x float> @hadd_4f32_v8f32_shuffle(<8 x float> %a0) { ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hadd_4f32_v8f32_shuffle( +; CHECK-SAME: <8 x float> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[HADD:%.*]] = fadd <4 x float> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <4 x float> [[HADD]] +; %shuf256 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> %lo = shufflevector <8 x float> %shuf256, <8 x float> undef, <4 x i32> %hi = shufflevector <8 x float> %shuf256, <8 x float> undef, <4 x i32> @@ -1074,6 +1265,13 @@ define <4 x float> @hsub_4f32_v8f32_shuffle(<8 x float> %a0) { ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsub_4f32_v8f32_shuffle( +; CHECK-SAME: <8 x float> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HSUB0:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[HSUB1:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[HSUB:%.*]] = fadd <4 x float> [[HSUB0]], [[HSUB1]] +; CHECK-NEXT: ret <4 x float> [[HSUB]] +; %shuf256 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> %lo = shufflevector <8 x float> %shuf256, <8 x float> undef, <4 x i32> %hi = shufflevector <8 x float> %shuf256, <8 x float> undef, <4 x i32> @@ -1113,6 +1311,13 @@ define <4 x i32> @hadd_4i32_v8i32_shuffle(<8 x i32> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x i32> @hadd_4i32_v8i32_shuffle( +; CHECK-SAME: <8 x i32> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HADD:%.*]] = add <4 x i32> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <4 x i32> [[HADD]] +; %shuf256 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %lo = shufflevector <8 x i32> %shuf256, <8 x i32> undef, <4 x i32> %hi = shufflevector <8 x i32> %shuf256, <8 x i32> undef, <4 x i32> @@ -1152,6 +1357,13 @@ define <4 x i32> @hsub_4i32_v8i32_shuffle(<8 x i32> %a0) { ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x i32> @hsub_4i32_v8i32_shuffle( +; CHECK-SAME: <8 x i32> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HSUB0:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HSUB1:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[HSUB:%.*]] = add <4 x i32> [[HSUB0]], [[HSUB1]] +; CHECK-NEXT: ret <4 x i32> [[HSUB]] +; %shuf256 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %lo = shufflevector <8 x i32> %shuf256, <8 x i32> undef, <4 x i32> %hi = shufflevector <8 x i32> %shuf256, <8 x i32> undef, <4 x i32> @@ -1185,6 +1397,13 @@ define <4 x double> @hadd_4f64_v4f64_shuffle(<4 x double> %a0, <4 x double> %a1) ; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x double> @hadd_4f64_v4f64_shuffle( +; CHECK-SAME: <4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <4 x double> [[A0]], <4 x double> [[A1]], <4 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <4 x double> [[A0]], <4 x double> [[A1]], <4 x i32> +; CHECK-NEXT: [[HADD:%.*]] = fadd <4 x double> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <4 x double> [[HADD]] +; %shuf0 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> %shuf1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> %hadd0 = shufflevector <4 x double> %shuf0, <4 x double> %shuf1, <4 x i32> @@ -1213,6 +1432,13 @@ define <4 x double> @hsub_4f64_v4f64_shuffle(<4 x double> %a0, <4 x double> %a1) ; AVX2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <4 x double> @hsub_4f64_v4f64_shuffle( +; CHECK-SAME: <4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <4 x double> [[A0]], <4 x double> [[A1]], <4 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <4 x double> [[A0]], <4 x double> [[A1]], <4 x i32> +; CHECK-NEXT: [[HADD:%.*]] = fsub <4 x double> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <4 x double> [[HADD]] +; %shuf0 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> %shuf1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> %hadd0 = shufflevector <4 x double> %shuf0, <4 x double> %shuf1, <4 x i32> @@ -1241,6 +1467,13 @@ define <8 x float> @hadd_8f32_v8f32_shuffle(<8 x float> %a0, <8 x float> %a1) { ; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x float> @hadd_8f32_v8f32_shuffle( +; CHECK-SAME: <8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD:%.*]] = fadd <8 x float> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <8 x float> [[HADD]] +; %shuf0 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> %shuf1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> %hadd0 = shufflevector <8 x float> %shuf0, <8 x float> %shuf1, <8 x i32> @@ -1269,6 +1502,13 @@ define <8 x float> @hsub_8f32_v8f32_shuffle(<8 x float> %a0, <8 x float> %a1) { ; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x float> @hsub_8f32_v8f32_shuffle( +; CHECK-SAME: <8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HSUB0:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> [[A1]], <8 x i32> +; CHECK-NEXT: [[HSUB1:%.*]] = shufflevector <8 x float> [[A0]], <8 x float> [[A1]], <8 x i32> +; CHECK-NEXT: [[HSUB:%.*]] = fadd <8 x float> [[HSUB0]], [[HSUB1]] +; CHECK-NEXT: ret <8 x float> [[HSUB]] +; %shuf0 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> %shuf1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> %hsub0 = shufflevector <8 x float> %shuf0, <8 x float> %shuf1, <8 x i32> @@ -1312,6 +1552,13 @@ define <8 x i32> @hadd_8i32_v8i32_shuffle(<8 x i32> %a0, <8 x i32> %a1) { ; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hadd_8i32_v8i32_shuffle( +; CHECK-SAME: <8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD:%.*]] = add <8 x i32> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <8 x i32> [[HADD]] +; %shuf0 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> %shuf1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> %hadd0 = shufflevector <8 x i32> %shuf0, <8 x i32> %shuf1, <8 x i32> @@ -1356,6 +1603,13 @@ define <8 x i32> @hsub_8i32_v8i32_shuffle(<8 x i32> %a0, <8 x i32> %a1) { ; AVX2-NEXT: vphsubd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <8 x i32> @hsub_8i32_v8i32_shuffle( +; CHECK-SAME: <8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> [[A1]], <8 x i32> +; CHECK-NEXT: [[HADD:%.*]] = sub <8 x i32> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <8 x i32> [[HADD]] +; %shuf0 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> %shuf1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> %hadd0 = shufflevector <8 x i32> %shuf0, <8 x i32> %shuf1, <8 x i32> @@ -1413,6 +1667,13 @@ define <16 x i16> @hadd_16i16_16i16_shuffle(<16 x i16> %a0, <16 x i16> %a1) { ; AVX2-NEXT: vphaddw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq +; CHECK-LABEL: define <16 x i16> @hadd_16i16_16i16_shuffle( +; CHECK-SAME: <16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[HADD0:%.*]] = shufflevector <16 x i16> [[A0]], <16 x i16> [[A1]], <16 x i32> +; CHECK-NEXT: [[HADD1:%.*]] = shufflevector <16 x i16> [[A0]], <16 x i16> [[A1]], <16 x i32> +; CHECK-NEXT: [[HADD:%.*]] = add <16 x i16> [[HADD0]], [[HADD1]] +; CHECK-NEXT: ret <16 x i16> [[HADD]] +; %shuf0 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> %shuf1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> %hadd0 = shufflevector <16 x i16> %shuf0, <16 x i16> %shuf1, <16 x i32> diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll similarity index 57% rename from llvm/test/CodeGen/X86/haddsub-undef.ll rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll index 94fa81742ba71..678b0a10717ac 100644 --- a/llvm/test/CodeGen/X86/haddsub-undef.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub-undef.ll @@ -1,12 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX512,AVX512-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX512,AVX512-FAST - -; Verify that we correctly fold horizontal binop even in the presence of UNDEFs. +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s define <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: test1_undef: @@ -18,6 +11,19 @@ define <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test1_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x float> [[TMP1]], float undef, i64 2 +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT3]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] +; CHECK-NEXT: [[VECINIT13:%.*]] = shufflevector <4 x float> [[VECINIT5]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT13]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -43,6 +49,19 @@ define <4 x float> @test2_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test2_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x float> [[TMP1]], float undef, i64 1 +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT9:%.*]] = shufflevector <4 x float> [[VECINIT3]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] +; CHECK-NEXT: [[VECINIT13:%.*]] = shufflevector <4 x float> [[VECINIT9]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT13]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -68,6 +87,19 @@ define <4 x float> @test3_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test3_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x float> [[TMP1]], float undef, i64 3 +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT3]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[B]], [[SHIFT2]] +; CHECK-NEXT: [[VECINIT9:%.*]] = shufflevector <4 x float> [[VECINIT5]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT9]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -105,6 +137,13 @@ define <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @test4_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -135,6 +174,13 @@ define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @test5_undef( +; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <2 x double> [[TMP1]], double undef, i64 1 +; CHECK-NEXT: ret <2 x double> [[VECINIT1]] +; %vecext = extractelement <2 x double> %a, i32 0 %vecext1 = extractelement <2 x double> %a, i32 1 %add = fadd double %vecext, %vecext1 @@ -152,6 +198,16 @@ define <4 x float> @test6_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test6_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT5]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -173,6 +229,16 @@ define <4 x float> @test7_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test7_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[B]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT5]] +; %vecext = extractelement <4 x float> %b, i32 0 %vecext1 = extractelement <4 x float> %b, i32 1 %add = fadd float %vecext, %vecext1 @@ -218,6 +284,16 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,1,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @test8_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT5]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -239,6 +315,16 @@ define <4 x float> @test9_undef(<4 x float> %a, <4 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @test9_undef( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <4 x float> [[VECINIT]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[VECINIT5]] +; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -260,6 +346,16 @@ define <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @test10_undef( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x float> [[VECINIT]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[VECINIT5]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -292,6 +388,16 @@ define <8 x float> @test11_undef(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @test11_undef( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[B]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x float> [[VECINIT]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[VECINIT5]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -313,6 +419,16 @@ define <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @test12_undef( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x float> [[VECINIT]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[VECINIT5]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add = fadd float %vecext, %vecext1 @@ -335,6 +451,14 @@ define <8 x float> @test13_undef(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @test13_undef( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP4]] +; %vecext = extractelement <8 x float> %a, i32 0 %vecext1 = extractelement <8 x float> %a, i32 1 %add1 = fadd float %vecext, %vecext1 @@ -389,6 +513,14 @@ define <16 x float> @test13_v16f32_undef(<16 x float> %a, <16 x float> %b) { ; AVX512-SLOW-NEXT: vaddss %xmm0, %xmm2, %xmm0 ; AVX512-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX512-SLOW-NEXT: retq +; CHECK-LABEL: define <16 x float> @test13_v16f32_undef( +; CHECK-SAME: <16 x float> [[A:%.*]], <16 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP4]] +; %vecext = extractelement <16 x float> %a, i32 0 %vecext1 = extractelement <16 x float> %a, i32 1 %add1 = fadd float %vecext, %vecext1 @@ -429,6 +561,12 @@ define <2 x double> @add_pd_003(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @add_pd_003( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[X]], [[L]] +; CHECK-NEXT: ret <2 x double> [[ADD]] +; %l = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %add = fadd <2 x double> %l, %x ret <2 x double> %add @@ -459,6 +597,12 @@ define <2 x double> @add_pd_003_2(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @add_pd_003_2( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x double> [[X]], [[L]] +; CHECK-NEXT: ret <2 x double> [[ADD]] +; %l = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %add = fadd <2 x double> %l, %x ret <2 x double> %add @@ -481,6 +625,12 @@ define <2 x double> @add_pd_010(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @add_pd_010( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = fadd <2 x double> [[TMP1]], [[X]] +; CHECK-NEXT: ret <2 x double> [[SHUFFLE2]] +; %l = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %add = fadd <2 x double> %l, %x %shuffle2 = shufflevector <2 x double> %add, <2 x double> undef, <2 x i32> @@ -497,6 +647,13 @@ define <4 x float> @add_ps_007(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_007( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[L]], [[R]] +; CHECK-NEXT: ret <4 x float> [[ADD]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -530,6 +687,13 @@ define <4 x float> @add_ps_030(<4 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,2,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_030( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE2]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -547,6 +711,13 @@ define <4 x float> @add_ps_007_2(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_007_2( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[L]], [[R]] +; CHECK-NEXT: ret <4 x float> [[ADD]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -575,6 +746,12 @@ define <4 x float> @add_ps_008(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_008( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[X]], [[L]] +; CHECK-NEXT: ret <4 x float> [[ADD]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %x ret <4 x float> %add @@ -593,6 +770,13 @@ define <4 x float> @add_ps_016(<4 x float> %0, <4 x float> %1) { ; AVX-NEXT: vhaddps %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,3,3] ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_016( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP0]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> %4 = shufflevector <4 x float> %1, <4 x float> %0, <2 x i32> %5 = fadd <2 x float> %3, %4 @@ -630,6 +814,13 @@ define <4 x float> @add_ps_017(<4 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_017( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE2]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %x %shuffle2 = shufflevector <4 x float> %add, <4 x float> undef, <4 x i32> @@ -660,6 +851,12 @@ define <4 x float> @add_ps_018(<4 x float> %x) { ; AVX512-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <4 x float> @add_ps_018( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE2:%.*]] = fadd <4 x float> [[TMP1]], [[X]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE2]] +; %l = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %add = fadd <4 x float> %l, %r @@ -704,6 +901,13 @@ define <4 x double> @add_pd_011(<4 x double> %0, <4 x double> %1) { ; AVX512-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] ; AVX512-NEXT: retq +; CHECK-LABEL: define <4 x double> @add_pd_011( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x double> [[TMP5]] +; %3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> %4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> %5 = fadd <4 x double> %3, %4 @@ -722,6 +926,18 @@ define <4 x float> @v8f32_inputs_v4f32_output_0101(<8 x float> %a, <8 x float> % ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @v8f32_inputs_v4f32_output_0101( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R0:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R0]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 %b0 = extractelement <8 x float> %b, i32 0 @@ -744,6 +960,17 @@ define <4 x float> @v8f32_input0_v4f32_output_0123(<8 x float> %a, <4 x float> % ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @v8f32_input0_v4f32_output_0123( +; CHECK-SAME: <8 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[R0:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 %b2 = extractelement <4 x float> %b, i32 2 @@ -766,6 +993,17 @@ define <4 x float> @v8f32_input1_v4f32_output_2301(<4 x float> %a, <8 x float> % ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @v8f32_input1_v4f32_output_2301( +; CHECK-SAME: <4 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R1]], <4 x float> [[TMP4]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a2 = extractelement <4 x float> %a, i32 2 %a3 = extractelement <4 x float> %a, i32 3 %b0 = extractelement <8 x float> %b, i32 0 @@ -788,6 +1026,18 @@ define <4 x float> @v8f32_inputs_v4f32_output_2323(<8 x float> %a, <8 x float> % ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @v8f32_inputs_v4f32_output_2323( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R1]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a2 = extractelement <8 x float> %a, i32 2 %a3 = extractelement <8 x float> %a, i32 3 %b2 = extractelement <8 x float> %b, i32 2 @@ -822,6 +1072,18 @@ define <4 x float> @v16f32_inputs_v4f32_output_0123(<16 x float> %a, <16 x float ; AVX512-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq +; CHECK-LABEL: define <4 x float> @v16f32_inputs_v4f32_output_0123( +; CHECK-SAME: <16 x float> [[A:%.*]], <16 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R0:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[R0]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] +; %a0 = extractelement <16 x float> %a, i32 0 %a1 = extractelement <16 x float> %a, i32 1 %b2 = extractelement <16 x float> %b, i32 2 @@ -853,6 +1115,18 @@ define <8 x float> @v16f32_inputs_v8f32_output_4567(<16 x float> %a, <16 x float ; AVX512: # %bb.0: ; AVX512-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <8 x float> @v16f32_inputs_v8f32_output_4567( +; CHECK-SAME: <16 x float> [[A:%.*]], <16 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[R4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <8 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[R4]], <8 x float> [[TMP6]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R]] +; %a4 = extractelement <16 x float> %a, i32 4 %a5 = extractelement <16 x float> %a, i32 5 %b6 = extractelement <16 x float> %b, i32 6 @@ -874,6 +1148,16 @@ define <8 x float> @PR40243(<8 x float> %a, <8 x float> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @PR40243( +; CHECK-SAME: <8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[SHIFT]] +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[R4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[R4]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R]] +; %a4 = extractelement <8 x float> %a, i32 4 %a5 = extractelement <8 x float> %a, i32 5 %add4 = fadd float %a4, %a5 @@ -921,6 +1205,13 @@ define <4 x double> @PR44694(<4 x double> %0, <4 x double> %1) { ; AVX512-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX512-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR44694( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x double> [[TMP5]] +; %3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> %4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> %5 = fadd <4 x double> %3, %4 @@ -952,6 +1243,13 @@ define <4 x float> @PR45747_1(<4 x float> %a, <4 x float> %b) nounwind { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR45747_1( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE]] +; %t0 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> %t1 = fadd <4 x float> %t0, %a %shuffle = shufflevector <4 x float> %t1, <4 x float> undef, <4 x i32> @@ -985,6 +1283,13 @@ define <4 x float> @PR45747_2(<4 x float> %a, <4 x float> %b) nounwind { ; AVX-FAST-NEXT: vhaddps %xmm1, %xmm1, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR45747_2( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[SHUFFLE]] +; %t0 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> %t1 = fadd <4 x float> %t0, %b %shuffle = shufflevector <4 x float> %t1, <4 x float> undef, <4 x i32> @@ -1001,6 +1306,13 @@ define <4 x float> @PR34724_add_v4f32_u123(<4 x float> %0, <4 x float> %1) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_add_v4f32_u123( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %0, <4 x float> %1, <2 x i32> %4 = shufflevector <4 x float> %0, <4 x float> %1, <2 x i32> %5 = fadd <2 x float> %3, %4 @@ -1040,6 +1352,13 @@ define <4 x float> @PR34724_add_v4f32_0u23(<4 x float> %0, <4 x float> %1) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_add_v4f32_0u23( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> %4 = fadd <4 x float> %3, %0 %5 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> @@ -1061,6 +1380,13 @@ define <4 x float> @PR34724_add_v4f32_01u3(<4 x float> %0, <4 x float> %1) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_add_v4f32_01u3( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> %4 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> %5 = fadd <2 x float> %3, %4 @@ -1081,6 +1407,13 @@ define <4 x float> @PR34724_add_v4f32_012u(<4 x float> %0, <4 x float> %1) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @PR34724_add_v4f32_012u( +; CHECK-SAME: <4 x float> [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP5]] +; %3 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> %4 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> %5 = fadd <2 x float> %3, %4 @@ -1129,6 +1462,20 @@ define <4 x double> @PR34724_add_v4f64_u123(<4 x double> %0, <4 x double> %1) { ; AVX-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX-FAST-NEXT: vhaddpd %ymm0, %ymm1, %ymm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR34724_add_v4f64_u123( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> , <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[SHIFT]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP11]] +; %3 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> %4 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> %5 = fadd <2 x double> %3, %4 @@ -1176,6 +1523,20 @@ define <4 x double> @PR34724_add_v4f64_0u23(<4 x double> %0, <4 x double> %1) { ; AVX-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-FAST-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR34724_add_v4f64_0u23( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double undef, i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[SHIFT]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP11]] +; %3 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> %4 = shufflevector <4 x double> %0, <4 x double> %1, <2 x i32> %5 = fadd <2 x double> %3, %4 @@ -1230,6 +1591,20 @@ define <4 x double> @PR34724_add_v4f64_01u3(<4 x double> %0, <4 x double> %1) { ; AVX512-FAST-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX512-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3] ; AVX512-FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR34724_add_v4f64_01u3( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double undef, i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[SHIFT]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP11]] +; %3 = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> %4 = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> %5 = fadd <2 x double> %3, %4 @@ -1276,6 +1651,20 @@ define <4 x double> @PR34724_add_v4f64_012u(<4 x double> %0, <4 x double> %1) { ; AVX-FAST-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 ; AVX-FAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x double> @PR34724_add_v4f64_012u( +; CHECK-SAME: <4 x double> [[TMP0:%.*]], <4 x double> [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double undef, i64 3 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[TMP1]], [[SHIFT]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP11]] +; %3 = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> %4 = shufflevector <4 x double> %0, <4 x double> undef, <2 x i32> %5 = fadd <2 x double> %3, %4 diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll similarity index 64% rename from llvm/test/CodeGen/X86/haddsub.ll rename to llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll index a0778195b5c73..91289087689ef 100644 --- a/llvm/test/CodeGen/X86/haddsub.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/haddsub.ll @@ -1,12 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3,SSE3-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE3,SSE3-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) { ; SSE3-LABEL: haddpd1: @@ -18,6 +11,13 @@ define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @haddpd1( +; CHECK-SAME: <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %r = fadd <2 x double> %a, %b @@ -34,6 +34,13 @@ define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @haddpd2( +; CHECK-SAME: <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> %r = fadd <2 x double> %a, %b @@ -63,6 +70,13 @@ define <2 x double> @haddpd3(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @haddpd3( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %r = fadd <2 x double> %a, %b @@ -79,6 +93,13 @@ define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps1( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %r = fadd <4 x float> %a, %b @@ -95,6 +116,13 @@ define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps2( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> %r = fadd <4 x float> %a, %b @@ -111,6 +139,13 @@ define <4 x float> @haddps3(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps3( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -127,6 +162,13 @@ define <4 x float> @haddps4(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps4( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -143,6 +185,13 @@ define <4 x float> @haddps5(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps5( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -171,6 +220,13 @@ define <4 x float> @haddps6(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps6( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -187,6 +243,13 @@ define <4 x float> @haddps7(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @haddps7( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fadd <4 x float> %a, %b @@ -203,6 +266,13 @@ define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsubpd1( +; CHECK-SAME: <2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> [[Y]], <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> %r = fsub <2 x double> %a, %b @@ -232,6 +302,13 @@ define <2 x double> @hsubpd2(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <2 x double> @hsubpd2( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <2 x double> [[A]], [[B]] +; CHECK-NEXT: ret <2 x double> [[R]] +; %a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> %r = fsub <2 x double> %a, %b @@ -248,6 +325,13 @@ define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsubps1( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> %r = fsub <4 x float> %a, %b @@ -264,6 +348,13 @@ define <4 x float> @hsubps2(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsubps2( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fsub <4 x float> %a, %b @@ -280,6 +371,13 @@ define <4 x float> @hsubps3(<4 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsubps3( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fsub <4 x float> %a, %b @@ -308,6 +406,13 @@ define <4 x float> @hsubps4(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define <4 x float> @hsubps4( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x float> [[A]], [[B]] +; CHECK-NEXT: ret <4 x float> [[R]] +; %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %r = fsub <4 x float> %a, %b @@ -325,6 +430,13 @@ define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhaddps1( +; CHECK-SAME: <8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %r = fadd <8 x float> %a, %b @@ -342,6 +454,13 @@ define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhaddps2( +; CHECK-SAME: <8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[Y]], <8 x float> [[X]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> %r = fadd <8 x float> %a, %b @@ -359,6 +478,13 @@ define <8 x float> @vhaddps3(<8 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhaddps3( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[X]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %r = fadd <8 x float> %a, %b @@ -376,6 +502,13 @@ define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhsubps1( +; CHECK-SAME: <8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[X]], <8 x float> [[Y]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> %r = fsub <8 x float> %a, %b @@ -393,6 +526,13 @@ define <8 x float> @vhsubps3(<8 x float> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubps %ymm0, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x float> @vhsubps3( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <8 x float> [[X]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x float> [[X]], <8 x float> , <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <8 x float> [[A]], [[B]] +; CHECK-NEXT: ret <8 x float> [[R]] +; %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %r = fsub <8 x float> %a, %b @@ -410,6 +550,13 @@ define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @vhaddpd1( +; CHECK-SAME: <4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fadd <4 x double> [[A]], [[B]] +; CHECK-NEXT: ret <4 x double> [[R]] +; %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> %r = fadd <4 x double> %a, %b @@ -427,6 +574,13 @@ define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) { ; AVX: # %bb.0: ; AVX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <4 x double> @vhsubpd1( +; CHECK-SAME: <4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> +; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = fsub <4 x double> [[A]], [[B]] +; CHECK-NEXT: ret <4 x double> [[R]] +; %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> %r = fsub <4 x double> %a, %b @@ -443,6 +597,13 @@ define <2 x float> @haddps_v2f32(<4 x float> %v0) { ; AVX: # %bb.0: ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <2 x float> @haddps_v2f32( +; CHECK-SAME: <4 x float> [[V0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[V0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[V0]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: ret <2 x float> [[TMP6]] +; %v0.0 = extractelement <4 x float> %v0, i32 0 %v0.1 = extractelement <4 x float> %v0, i32 1 %v0.2 = extractelement <4 x float> %v0, i32 2 @@ -478,6 +639,13 @@ define float @extract_extract01_v4f32_fadd_f32(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 %x01 = fadd float %x0, %x1 @@ -511,6 +679,13 @@ define float @extract_extract23_v4f32_fadd_f32(<4 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v4f32_fadd_f32( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 2 %x1 = extractelement <4 x float> %x, i32 3 %x01 = fadd float %x0, %x1 @@ -539,6 +714,13 @@ define float @extract_extract01_v4f32_fadd_f32_commute(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32_commute( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 %x01 = fadd float %x1, %x0 @@ -572,6 +754,13 @@ define float @extract_extract23_v4f32_fadd_f32_commute(<4 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v4f32_fadd_f32_commute( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 2 %x1 = extractelement <4 x float> %x, i32 3 %x01 = fadd float %x1, %x0 @@ -601,6 +790,13 @@ define double @extract_extract01_v2f64_fadd_f64(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v2f64_fadd_f64( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <2 x double> %x, i32 0 %x1 = extractelement <2 x double> %x, i32 1 %x01 = fadd double %x0, %x1 @@ -630,6 +826,13 @@ define double @extract_extract01_v2f64_fadd_f64_commute(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v2f64_fadd_f64_commute( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <2 x double> %x, i32 0 %x1 = extractelement <2 x double> %x, i32 1 %x01 = fadd double %x1, %x0 @@ -658,6 +861,13 @@ define float @extract_extract01_v4f32_fsub_f32(<4 x float> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fsub_f32( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 %x01 = fsub float %x0, %x1 @@ -692,6 +902,13 @@ define float @extract_extract23_v4f32_fsub_f32(<4 x float> %x) { ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v4f32_fsub_f32( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 2 %x1 = extractelement <4 x float> %x, i32 3 %x01 = fsub float %x0, %x1 @@ -711,6 +928,13 @@ define float @extract_extract01_v4f32_fsub_f32_commute(<4 x float> %x) { ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fsub_f32_commute( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 %x01 = fsub float %x1, %x0 @@ -732,6 +956,13 @@ define float @extract_extract23_v4f32_fsub_f32_commute(<4 x float> %x) { ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v4f32_fsub_f32_commute( +; CHECK-SAME: <4 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 2 %x1 = extractelement <4 x float> %x, i32 3 %x01 = fsub float %x1, %x0 @@ -761,6 +992,13 @@ define double @extract_extract01_v2f64_fsub_f64(<2 x double> %x) { ; AVX-FAST: # %bb.0: ; AVX-FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v2f64_fsub_f64( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <2 x double> %x, i32 0 %x1 = extractelement <2 x double> %x, i32 1 %x01 = fsub double %x0, %x1 @@ -781,6 +1019,13 @@ define double @extract_extract01_v2f64_fsub_f64_commute(<2 x double> %x) { ; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v2f64_fsub_f64_commute( +; CHECK-SAME: <2 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <2 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <2 x double> %x, i32 0 %x1 = extractelement <2 x double> %x, i32 1 %x01 = fsub double %x1, %x0 @@ -813,6 +1058,13 @@ define float @extract_extract01_v8f32_fadd_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v8f32_fadd_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 0 %x1 = extractelement <8 x float> %x, i32 1 %x01 = fadd float %x0, %x1 @@ -848,6 +1100,13 @@ define float @extract_extract23_v8f32_fadd_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v8f32_fadd_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 2 %x1 = extractelement <8 x float> %x, i32 3 %x01 = fadd float %x0, %x1 @@ -885,6 +1144,13 @@ define float @extract_extract67_v8f32_fadd_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract67_v8f32_fadd_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 6 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 6 %x1 = extractelement <8 x float> %x, i32 7 %x01 = fadd float %x0, %x1 @@ -915,6 +1181,13 @@ define float @extract_extract01_v8f32_fadd_f32_commute(<8 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v8f32_fadd_f32_commute( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 0 %x1 = extractelement <8 x float> %x, i32 1 %x01 = fadd float %x1, %x0 @@ -950,6 +1223,13 @@ define float @extract_extract23_v8f32_fadd_f32_commute(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v8f32_fadd_f32_commute( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 2 %x1 = extractelement <8 x float> %x, i32 3 %x01 = fadd float %x1, %x0 @@ -987,6 +1267,13 @@ define float @extract_extract67_v8f32_fadd_f32_commute(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract67_v8f32_fadd_f32_commute( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 6 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 6 %x1 = extractelement <8 x float> %x, i32 7 %x01 = fadd float %x1, %x0 @@ -1018,6 +1305,13 @@ define double @extract_extract01_v4f64_fadd_f64(<4 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v4f64_fadd_f64( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 0 %x1 = extractelement <4 x double> %x, i32 1 %x01 = fadd double %x0, %x1 @@ -1052,6 +1346,13 @@ define double @extract_extract23_v4f64_fadd_f64(<4 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract23_v4f64_fadd_f64( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 2 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 2 %x1 = extractelement <4 x double> %x, i32 3 %x01 = fadd double %x0, %x1 @@ -1083,6 +1384,13 @@ define double @extract_extract01_v4f64_fadd_f64_commute(<4 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v4f64_fadd_f64_commute( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 0 %x1 = extractelement <4 x double> %x, i32 1 %x01 = fadd double %x1, %x0 @@ -1117,6 +1425,13 @@ define double @extract_extract23_v4f64_fadd_f64_commute(<4 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract23_v4f64_fadd_f64_commute( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 2 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 2 %x1 = extractelement <4 x double> %x, i32 3 %x01 = fadd double %x1, %x0 @@ -1147,6 +1462,13 @@ define float @extract_extract01_v8f32_fsub_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v8f32_fsub_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 0 %x1 = extractelement <8 x float> %x, i32 1 %x01 = fsub float %x0, %x1 @@ -1183,6 +1505,13 @@ define float @extract_extract23_v8f32_fsub_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract23_v8f32_fsub_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 2 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 2 %x1 = extractelement <8 x float> %x, i32 3 %x01 = fsub float %x0, %x1 @@ -1217,6 +1546,13 @@ define float @extract_extract45_v8f32_fsub_f32(<8 x float> %x) { ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract45_v8f32_fsub_f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 4 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 4 %x1 = extractelement <8 x float> %x, i32 5 %x01 = fsub float %x0, %x1 @@ -1239,6 +1575,13 @@ define float @extract_extract01_v8f32_fsub_f32_commute(<8 x float> %x) { ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v8f32_fsub_f32_commute( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <8 x float> %x, i32 0 %x1 = extractelement <8 x float> %x, i32 1 %x01 = fsub float %x1, %x0 @@ -1270,6 +1613,13 @@ define double @extract_extract01_v4f64_fsub_f64(<4 x double> %x) { ; AVX-FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v4f64_fsub_f64( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 0 %x1 = extractelement <4 x double> %x, i32 1 %x01 = fsub double %x0, %x1 @@ -1293,6 +1643,13 @@ define double @extract_extract01_v4f64_fsub_f64_commute(<4 x double> %x) { ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v4f64_fsub_f64_commute( +; CHECK-SAME: <4 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[X]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <4 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <4 x double> %x, i32 0 %x1 = extractelement <4 x double> %x, i32 1 %x01 = fsub double %x1, %x0 @@ -1325,6 +1682,13 @@ define float @extract_extract01_v16f32_fadd_f32(<16 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v16f32_fadd_f32( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <16 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <16 x float> %x, i32 0 %x1 = extractelement <16 x float> %x, i32 1 %x01 = fadd float %x0, %x1 @@ -1355,6 +1719,13 @@ define float @extract_extract01_v16f32_fadd_f32_commute(<16 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v16f32_fadd_f32_commute( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <16 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <16 x float> %x, i32 0 %x1 = extractelement <16 x float> %x, i32 1 %x01 = fadd float %x1, %x0 @@ -1386,6 +1757,13 @@ define double @extract_extract01_v8f64_fadd_f64(<8 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v8f64_fadd_f64( +; CHECK-SAME: <8 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x double> [[X]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <8 x double> %x, i32 0 %x1 = extractelement <8 x double> %x, i32 1 %x01 = fadd double %x0, %x1 @@ -1417,6 +1795,13 @@ define double @extract_extract01_v8f64_fadd_f64_commute(<8 x double> %x) { ; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v8f64_fadd_f64_commute( +; CHECK-SAME: <8 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x double> [[X]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <8 x double> %x, i32 0 %x1 = extractelement <8 x double> %x, i32 1 %x01 = fadd double %x1, %x0 @@ -1447,6 +1832,13 @@ define float @extract_extract01_v16f32_fsub_f32(<16 x float> %x) { ; AVX-FAST-NEXT: vhsubps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v16f32_fsub_f32( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <16 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <16 x float> %x, i32 0 %x1 = extractelement <16 x float> %x, i32 1 %x01 = fsub float %x0, %x1 @@ -1467,6 +1859,13 @@ define float @extract_extract01_v16f32_fsub_f32_commute(<16 x float> %x) { ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v16f32_fsub_f32_commute( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <16 x float> [[TMP1]], i64 0 +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <16 x float> %x, i32 0 %x1 = extractelement <16 x float> %x, i32 1 %x01 = fsub float %x1, %x0 @@ -1498,6 +1897,13 @@ define double @extract_extract01_v8f64_fsub_f64(<8 x double> %x) { ; AVX-FAST-NEXT: vhsubpd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v8f64_fsub_f64( +; CHECK-SAME: <8 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x double> [[X]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[X]], [[SHIFT]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <8 x double> %x, i32 0 %x1 = extractelement <8 x double> %x, i32 1 %x01 = fsub double %x0, %x1 @@ -1519,6 +1925,13 @@ define double @extract_extract01_v8f64_fsub_f64_commute(<8 x double> %x) { ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define double @extract_extract01_v8f64_fsub_f64_commute( +; CHECK-SAME: <8 x double> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x double> [[X]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[SHIFT]], [[X]] +; CHECK-NEXT: [[X01:%.*]] = extractelement <8 x double> [[TMP1]], i64 0 +; CHECK-NEXT: ret double [[X01]] +; %x0 = extractelement <8 x double> %x, i32 0 %x1 = extractelement <8 x double> %x, i32 1 %x01 = fsub double %x1, %x0 @@ -1553,6 +1966,14 @@ define float @extract_extract01_v4f32_fadd_f32_uses1(<4 x float> %x, ptr %p) { ; AVX-FAST-NEXT: vmovss %xmm0, (%rdi) ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32_uses1( +; CHECK-SAME: <4 x float> [[X:%.*]], ptr writeonly captures(none) initializes((0, 4)) [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X]], i64 0 +; CHECK-NEXT: store float [[X0]], ptr [[P]], align 4 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i64 1 +; CHECK-NEXT: [[X01:%.*]] = fadd float [[X0]], [[X1]] +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 store float %x0, ptr %p %x1 = extractelement <4 x float> %x, i32 1 @@ -1587,6 +2008,14 @@ define float @extract_extract01_v4f32_fadd_f32_uses2(<4 x float> %x, ptr %p) { ; AVX-FAST-NEXT: vextractps $1, %xmm0, (%rdi) ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32_uses2( +; CHECK-SAME: <4 x float> [[X:%.*]], ptr writeonly captures(none) initializes((0, 4)) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i64 1 +; CHECK-NEXT: store float [[X1]], ptr [[P]], align 4 +; CHECK-NEXT: [[X01:%.*]] = fadd float [[X0]], [[X1]] +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 store float %x1, ptr %p @@ -1610,6 +2039,15 @@ define float @extract_extract01_v4f32_fadd_f32_uses3(<4 x float> %x, ptr %p1, pt ; AVX-NEXT: vmovss %xmm1, (%rsi) ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @extract_extract01_v4f32_fadd_f32_uses3( +; CHECK-SAME: <4 x float> [[X:%.*]], ptr writeonly captures(none) initializes((0, 4)) [[P1:%.*]], ptr writeonly captures(none) initializes((0, 4)) [[P2:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X]], i64 0 +; CHECK-NEXT: store float [[X0]], ptr [[P1]], align 4 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i64 1 +; CHECK-NEXT: store float [[X1]], ptr [[P2]], align 4 +; CHECK-NEXT: [[X01:%.*]] = fadd float [[X0]], [[X1]] +; CHECK-NEXT: ret float [[X01]] +; %x0 = extractelement <4 x float> %x, i32 0 store float %x0, ptr %p1 %x1 = extractelement <4 x float> %x, i32 1 @@ -1665,6 +2103,11 @@ define float @fadd_reduce_v8f32(float %a0, <8 x float> %a1) { ; AVX-FAST-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @fadd_reduce_v8f32( +; CHECK-SAME: float [[A0:%.*]], <8 x float> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v8f32(float [[A0]], <8 x float> [[A1]]) +; CHECK-NEXT: ret float [[R]] +; %r = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1) ret float %r } @@ -1704,6 +2147,11 @@ define double @fadd_reduce_v4f64(double %a0, <4 x double> %a1) { ; AVX-FAST-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define double @fadd_reduce_v4f64( +; CHECK-SAME: double [[A0:%.*]], <4 x double> [[A1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v4f64(double [[A0]], <4 x double> [[A1]]) +; CHECK-NEXT: ret double [[R]] +; %r = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1) ret double %r } @@ -1760,6 +2208,19 @@ define float @PR39936_v8f32(<8 x float>) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @PR39936_v8f32( +; CHECK-SAME: <8 x float> [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <8 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = fadd <8 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP9]], i64 0 +; CHECK-NEXT: ret float [[TMP10]] +; %2 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> %4 = fadd <8 x float> %2, %3 @@ -1804,6 +2265,15 @@ define float @hadd32_4(<4 x float> %x225) { ; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @hadd32_4( +; CHECK-SAME: <4 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <4 x float> [[X225]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <4 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <4 x float> [[X227]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <4 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <4 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> %x227 = fadd <4 x float> %x225, %x226 %x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> @@ -1846,6 +2316,15 @@ define float @hadd32_8(<8 x float> %x225) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @hadd32_8( +; CHECK-SAME: <8 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <8 x float> [[X225]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <8 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <8 x float> [[X227]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <8 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <8 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> %x227 = fadd <8 x float> %x225, %x226 %x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> @@ -1888,6 +2367,15 @@ define float @hadd32_16(<16 x float> %x225) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @hadd32_16( +; CHECK-SAME: <16 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <16 x float> [[X225]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <16 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <16 x float> [[X227]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <16 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <16 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> %x227 = fadd <16 x float> %x225, %x226 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> @@ -1911,6 +2399,15 @@ define float @hadd32_4_optsize(<4 x float> %x225) optsize { ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_4_optsize( +; CHECK-SAME: <4 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <4 x float> [[X225]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <4 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <4 x float> [[X227]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <4 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <4 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> %x227 = fadd <4 x float> %x225, %x226 %x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> @@ -1935,6 +2432,15 @@ define float @hadd32_8_optsize(<8 x float> %x225) optsize { ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_8_optsize( +; CHECK-SAME: <8 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <8 x float> [[X225]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <8 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <8 x float> [[X227]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <8 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <8 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> %x227 = fadd <8 x float> %x225, %x226 %x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> @@ -1959,6 +2465,15 @@ define float @hadd32_16_optsize(<16 x float> %x225) optsize { ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_16_optsize( +; CHECK-SAME: <16 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <16 x float> [[X225]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <16 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <16 x float> [[X227]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <16 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <16 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> %x227 = fadd <16 x float> %x225, %x226 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> @@ -1982,6 +2497,15 @@ define float @hadd32_4_pgso(<4 x float> %x225) !prof !14 { ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_4_pgso( +; CHECK-SAME: <4 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] !prof [[PROF14:![0-9]+]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <4 x float> [[X225]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <4 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <4 x float> [[X227]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <4 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <4 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> %x227 = fadd <4 x float> %x225, %x226 %x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> @@ -2006,6 +2530,15 @@ define float @hadd32_8_pgso(<8 x float> %x225) !prof !14 { ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_8_pgso( +; CHECK-SAME: <8 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] !prof [[PROF14]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <8 x float> [[X225]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <8 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <8 x float> [[X227]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <8 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <8 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> %x227 = fadd <8 x float> %x225, %x226 %x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> @@ -2030,6 +2563,15 @@ define float @hadd32_16_pgso(<16 x float> %x225) !prof !14 { ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq +; CHECK-LABEL: define float @hadd32_16_pgso( +; CHECK-SAME: <16 x float> [[X225:%.*]]) local_unnamed_addr #[[ATTR0]] !prof [[PROF14]] { +; CHECK-NEXT: [[X226:%.*]] = shufflevector <16 x float> [[X225]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X227:%.*]] = fadd <16 x float> [[X225]], [[X226]] +; CHECK-NEXT: [[X228:%.*]] = shufflevector <16 x float> [[X227]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X229:%.*]] = fadd <16 x float> [[X227]], [[X228]] +; CHECK-NEXT: [[X230:%.*]] = extractelement <16 x float> [[X229]], i64 0 +; CHECK-NEXT: ret float [[X230]] +; %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> %x227 = fadd <16 x float> %x225, %x226 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> @@ -2071,6 +2613,15 @@ define float @partial_reduction_fadd_v8f32(<8 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @partial_reduction_fadd_v8f32( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X23:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X0213:%.*]] = fadd <8 x float> [[X]], [[X23]] +; CHECK-NEXT: [[X13:%.*]] = shufflevector <8 x float> [[X0213]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X0123:%.*]] = fadd reassoc nsz <8 x float> [[X13]], [[X0213]] +; CHECK-NEXT: [[R:%.*]] = extractelement <8 x float> [[X0123]], i64 0 +; CHECK-NEXT: ret float [[R]] +; %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %x0213 = fadd <8 x float> %x, %x23 %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> @@ -2116,6 +2667,15 @@ define float @partial_reduction_fadd_v8f32_wrong_flags(<8 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @partial_reduction_fadd_v8f32_wrong_flags( +; CHECK-SAME: <8 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X23:%.*]] = shufflevector <8 x float> [[X]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X0213:%.*]] = fadd fast <8 x float> [[X23]], [[X]] +; CHECK-NEXT: [[X13:%.*]] = shufflevector <8 x float> [[X0213]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[X0123:%.*]] = fadd nnan ninf <8 x float> [[X0213]], [[X13]] +; CHECK-NEXT: [[R:%.*]] = extractelement <8 x float> [[X0123]], i64 0 +; CHECK-NEXT: ret float [[R]] +; %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> %x0213 = fadd fast <8 x float> %x, %x23 %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> @@ -2157,6 +2717,15 @@ define float @partial_reduction_fadd_v16f32(<16 x float> %x) { ; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vzeroupper ; AVX-FAST-NEXT: retq +; CHECK-LABEL: define float @partial_reduction_fadd_v16f32( +; CHECK-SAME: <16 x float> [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[X23:%.*]] = shufflevector <16 x float> [[X]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X0213:%.*]] = fadd <16 x float> [[X]], [[X23]] +; CHECK-NEXT: [[X13:%.*]] = shufflevector <16 x float> [[X0213]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[X0123:%.*]] = fadd reassoc nsz <16 x float> [[X13]], [[X0213]] +; CHECK-NEXT: [[R:%.*]] = extractelement <16 x float> [[X0123]], i64 0 +; CHECK-NEXT: ret float [[R]] +; %x23 = shufflevector <16 x float> %x, <16 x float> undef, <16 x i32> %x0213 = fadd <16 x float> %x, %x23 %x13 = shufflevector <16 x float> %x0213, <16 x float> undef, <16 x i32> @@ -2181,3 +2750,6 @@ define float @partial_reduction_fadd_v16f32(<16 x float> %x) { !12 = !{i32 999000, i64 100, i32 1} !13 = !{i32 999999, i64 1, i32 2} !14 = !{!"function_entry_count", i64 0} +;. +; CHECK: [[PROF14]] = !{!"function_entry_count", i64 0} +;. diff --git a/llvm/test/CodeGen/X86/phaddsub-undef.ll b/llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll similarity index 53% rename from llvm/test/CodeGen/X86/phaddsub-undef.ll rename to llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll index 8aa40939994fd..3cfd1b797209c 100644 --- a/llvm/test/CodeGen/X86/phaddsub-undef.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/phaddsub-undef.ll @@ -1,14 +1,6 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,fast-hops | FileCheck %s --check-prefixes=SSE,SSE-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1,AVX1-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX1,AVX1-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,fast-hops | FileCheck %s --check-prefixes=AVX,AVX512 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S %s | FileCheck %s -; Verify that we correctly fold horizontal binop even in the presence of UNDEFs. define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) { ; SSE-LABEL: test14_undef: @@ -20,6 +12,16 @@ define <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x i32> @test14_undef( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x i32> [[VECINIT]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[VECINIT5]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -87,6 +89,16 @@ define <8 x i32> @test15_undef(<8 x i32> %a, <8 x i32> %b) { ; AVX512: # %bb.0: ; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <8 x i32> @test15_undef( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[B]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x i32> [[VECINIT]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[VECINIT5]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -121,6 +133,16 @@ define <8 x i32> @PR40243_alt(<8 x i32> %a, <8 x i32> %b) { ; AVX512: # %bb.0: ; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <8 x i32> @PR40243_alt( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[SHIFT1]], [[B]] +; CHECK-NEXT: [[R4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[R4]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[R]] +; %a4 = extractelement <8 x i32> %a, i32 4 %a5 = extractelement <8 x i32> %a, i32 5 %add4 = add i32 %a4, %a5 @@ -142,6 +164,16 @@ define <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <8 x i32> @test16_undef( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <8 x i32> [[VECINIT]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[VECINIT5]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -163,6 +195,16 @@ define <16 x i32> @test16_v16i32_undef(<16 x i32> %a, <16 x i32> %b) { ; AVX: # %bb.0: ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq +; CHECK-LABEL: define <16 x i32> @test16_v16i32_undef( +; CHECK-SAME: <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[A]], [[SHIFT]] +; CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> , <16 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[A]], [[SHIFT1]] +; CHECK-NEXT: [[VECINIT5:%.*]] = shufflevector <16 x i32> [[VECINIT]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[VECINIT5]] +; %vecext = extractelement <16 x i32> %a, i32 0 %vecext1 = extractelement <16 x i32> %a, i32 1 %add = add i32 %vecext, %vecext1 @@ -197,6 +239,14 @@ define <8 x i32> @test17_undef(<8 x i32> %a, <8 x i32> %b) { ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <8 x i32> @test17_undef( +; CHECK-SAME: <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP4]] +; %vecext = extractelement <8 x i32> %a, i32 0 %vecext1 = extractelement <8 x i32> %a, i32 1 %add1 = add i32 %vecext, %vecext1 @@ -239,6 +289,14 @@ define <16 x i32> @test17_v16i32_undef(<16 x i32> %a, <16 x i32> %b) { ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq +; CHECK-LABEL: define <16 x i32> @test17_v16i32_undef( +; CHECK-SAME: <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A]], <16 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[TMP4]] +; %vecext = extractelement <16 x i32> %a, i32 0 %vecext1 = extractelement <16 x i32> %a, i32 1 %add1 = add i32 %vecext, %vecext1 From 3d12ab451a1ba436419b1074e5292196fcf13e3e Mon Sep 17 00:00:00 2001 From: william Date: Sat, 19 Jul 2025 11:43:28 +0800 Subject: [PATCH 2/2] [DAG] Add m_SelectCCLike matcher and update test cases --- llvm/include/llvm/CodeGen/SDPatternMatch.h | 15 +- .../llvm/CodeGen/SelectionDAGISelMatchers.h | 53 +++++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 218 +++++++++++++----- llvm/test/CodeGen/RISCV/rv32zbb.ll | 26 ++- llvm/test/CodeGen/RISCV/rv64zbb.ll | 32 ++- 5 files changed, 265 insertions(+), 79 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/SelectionDAGISelMatchers.h diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 2967532226197..d20b5d80670e2 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -93,7 +93,8 @@ struct Value_match { explicit Value_match(SDValue Match) : MatchVal(Match) {} - template bool match(const MatchContext &, SDValue N) { + template + bool match(const MatchContext &, SDValue N) const { if (MatchVal) return MatchVal == N; return N.getNode(); @@ -130,7 +131,8 @@ struct DeferredValue_match { explicit DeferredValue_match(SDValue &Match) : MatchVal(Match) {} - template bool match(const MatchContext &, SDValue N) { + template + bool match(const MatchContext &, SDValue N) const { return N == MatchVal; } }; @@ -196,7 +198,8 @@ struct Value_bind { explicit Value_bind(SDValue &N) : BindVal(N) {} - template bool match(const MatchContext &, SDValue N) { + template + bool match(const MatchContext &, SDValue N) const { BindVal = N; return true; } @@ -975,8 +978,7 @@ template inline UnaryOpc_match m_BitCast(const Opnd &Op) { return UnaryOpc_match(ISD::BITCAST, Op); } -template -inline UnaryOpc_match m_BSwap(const Opnd &Op) { +template inline UnaryOpc_match m_BSwap(const Opnd &Op) { return UnaryOpc_match(ISD::BSWAP, Op); } @@ -1203,7 +1205,8 @@ struct CondCode_match { explicit CondCode_match(ISD::CondCode *CC) : BindCC(CC) {} - template bool match(const MatchContext &, SDValue N) { + template + bool match(const MatchContext &, SDValue N) const { if (auto *CC = dyn_cast(N.getNode())) { if (CCToMatch && *CCToMatch != CC->get()) return false; diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISelMatchers.h b/llvm/include/llvm/CodeGen/SelectionDAGISelMatchers.h new file mode 100644 index 0000000000000..926aca0bed904 --- /dev/null +++ b/llvm/include/llvm/CodeGen/SelectionDAGISelMatchers.h @@ -0,0 +1,53 @@ +namespace llvm { +namespace SDPatternMatch { + +// 1. 定義 SelectCC_match +template +struct SelectCC_match { + const LTy &L; + const RTy &R; + const TTy &T; + const FTy &F; + const CCTy &CC; + + SelectCC_match(const LTy &l, const RTy &r, + const TTy &t, const FTy &f, + const CCTy &cc) + : L(l), R(r), T(t), F(f), CC(cc) {} + + template + bool match(OpTy V) const { + if (V.getOpcode() != ISD::SELECT_CC) + return false; + + return L.match(V.getOperand(0)) && + R.match(V.getOperand(1)) && + T.match(V.getOperand(2)) && + F.match(V.getOperand(3)) && + CC.match(cast(V.getOperand(4))->get()); + } +}; + +// 2. 定義 m_SelectCC +template +inline SelectCC_match +m_SelectCC(const LTy &L, const RTy &R, + const TTy &T, const FTy &F, + const CCTy &CC) { + return SelectCC_match(L, R, T, F, CC); +} + +// 3. 定義 m_SelectCCLike +template +inline auto m_SelectCCLike(const LTy &L, const RTy &R, + const TTy &T, const FTy &F, + const CCTy &CC) { + return m_AnyOf( + m_Select(m_SetCC(L, R, CC), T, F), + m_SelectCC(L, R, T, F, CC) + ); +} + +} // namespace SDPatternMatch +} // namespace llvm + diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fed5e7238433e..9a83847bb01c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -264,6 +264,47 @@ namespace { VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits) MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue(); } + + template + struct SelectCC_match { + LTy L; + RTy R; + TTy T; + FTy F; + CCTy CC; + + SelectCC_match(LTy L, RTy R, TTy T, FTy F, CCTy CC) + : L(std::move(L)), R(std::move(R)), T(std::move(T)), F(std::move(F)), + CC(std::move(CC)) {} + + template + bool match(const MatchContext &Ctx, SDValue V) const { + return V.getOpcode() == ISD::SELECT_CC && L.match(Ctx, V.getOperand(0)) && + R.match(Ctx, V.getOperand(1)) && T.match(Ctx, V.getOperand(2)) && + F.match(Ctx, V.getOperand(3)) && CC.match(Ctx, V.getOperand(4)); + } + }; + + template + inline auto m_SelectCC(LTy &&L, RTy &&R, TTy &&T, FTy &&F, CCTy &&CC) { + return SelectCC_match, std::decay_t, + std::decay_t, std::decay_t, + std::decay_t>( + std::forward(L), std::forward(R), std::forward(T), + std::forward(F), std::forward(CC)); + } + + template + inline auto m_SelectCCLike(LTy &&L, RTy &&R, TTy &&T, FTy &&F, CCTy &&CC) { + return SDPatternMatch::m_AnyOf( + SDPatternMatch::m_Select(SDPatternMatch::m_SetCC(L, R, CC), T, F), + m_SelectCC(std::forward(L), std::forward(R), + std::forward(T), std::forward(F), + std::forward(CC))); + } void ConsiderForPruning(SDNode *N) { // Mark this for potential pruning. @@ -640,6 +681,7 @@ namespace { SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex); + SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); @@ -2608,7 +2650,9 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { return SDValue(); } - return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags()); + SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); + SelectOp->setFlags(BO->getFlags()); + return SelectOp; } static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, @@ -4300,8 +4344,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return V; // (A - B) - 1 -> add (xor B, -1), A - if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), - m_One(/*AllowUndefs=*/true)))) + if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One()))) return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT)); // Look for: @@ -9155,7 +9198,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value()) return std::nullopt; - unsigned BitWidth = Op.getScalarValueSizeInBits(); + unsigned BitWidth = Op.getValueSizeInBits(); if (BitWidth % 8 != 0) return std::nullopt; unsigned ByteWidth = BitWidth / 8; @@ -9254,7 +9297,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, if (!L->isSimple() || L->isIndexed()) return std::nullopt; - unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits(); + unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); if (NarrowBitWidth % 8 != 0) return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; @@ -9908,14 +9951,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue Combined = visitADDLike(N)) return Combined; - // fold not (setcc x, y, cc) -> setcc x y !cc - // Avoid breaking: and (not(setcc x, y, cc), z) -> andn for vec + // fold !(x cc y) -> (x !cc y) unsigned N0Opcode = N0.getOpcode(); SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1) && - isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) && - !(VT.isVector() && TLI.hasAndNot(SDValue(N, 0)) && N->hasOneUse() && - N->use_begin()->getUser()->getOpcode() == ISD::AND)) { + isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), LHS.getValueType()); if (!LegalOperations || @@ -12221,8 +12261,11 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return V; // select (not Cond), N1, N2 -> select Cond, N2, N1 - if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) - return DAG.getSelect(DL, VT, F, N2, N1, Flags); + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { + SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); + SelectOp->setFlags(Flags); + return SelectOp; + } if (SDValue V = foldSelectOfConstants(N)) return V; @@ -13102,10 +13145,10 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, EVT CondVT = Cond.getValueType(); assert(CondVT.isVector() && "Vector select expects a vector selector!"); - bool IsTAllZero = ISD::isConstantSplatVectorAllZeros(TVal.getNode()); - bool IsTAllOne = ISD::isConstantSplatVectorAllOnes(TVal.getNode()); - bool IsFAllZero = ISD::isConstantSplatVectorAllZeros(FVal.getNode()); - bool IsFAllOne = ISD::isConstantSplatVectorAllOnes(FVal.getNode()); + bool IsTAllZero = ISD::isBuildVectorAllZeros(TVal.getNode()); + bool IsTAllOne = ISD::isBuildVectorAllOnes(TVal.getNode()); + bool IsFAllZero = ISD::isBuildVectorAllZeros(FVal.getNode()); + bool IsFAllOne = ISD::isBuildVectorAllOnes(FVal.getNode()); // no vselect(cond, 0/-1, X) or vselect(cond, X, 0/-1), return if (!IsTAllZero && !IsTAllOne && !IsFAllZero && !IsFAllOne) @@ -13179,15 +13222,6 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal, return DAG.getBitcast(VT, And); } - // select Cond, 0, x -> and not(Cond), x - if (IsTAllZero && - (isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) { - SDValue X = DAG.getBitcast(CondVT, FVal); - SDValue And = - DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X); - return DAG.getBitcast(VT, And); - } - return SDValue(); } @@ -13205,9 +13239,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return V; // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 - if (!TLI.isTargetCanonicalSelect(N)) - if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) - return DAG.getSelect(DL, VT, F, N2, N1); + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) + return DAG.getSelect(DL, VT, F, N2, N1); // select (sext m), (add X, C), X --> (add X, (and C, (sext m)))) if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() && @@ -13506,9 +13539,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { // Fold to a simpler select_cc if (SCC.getOpcode() == ISD::SETCC) { - return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), - SCC.getOperand(0), SCC.getOperand(1), N2, N3, - SCC.getOperand(2), SCC->getFlags()); + SDValue SelectOp = + DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0), + SCC.getOperand(1), N2, N3, SCC.getOperand(2)); + SelectOp->setFlags(SCC->getFlags()); + return SelectOp; } } @@ -16458,8 +16493,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { TLI.isTypeLegal(VT.getVectorElementType()))) && N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() && cast(N0)->isConstant()) - return DAG.FoldConstantBuildVector(cast(N0), SDLoc(N), - VT.getVectorElementType()); + return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), + VT.getVectorElementType()); // If the input is a constant, let getNode fold it. if (isIntOrFPConstant(N0)) { @@ -16848,6 +16883,83 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { return DAG.getNode(N0.getOpcode(), DL, N0->getVTList(), Ops, SafeFlags); } +/// We know that BV is a build_vector node with Constant, ConstantFP or Undef +/// operands. DstEltVT indicates the destination element value type. +SDValue DAGCombiner:: +ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { + EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); + + // If this is already the right type, we're done. + if (SrcEltVT == DstEltVT) return SDValue(BV, 0); + + unsigned SrcBitSize = SrcEltVT.getSizeInBits(); + unsigned DstBitSize = DstEltVT.getSizeInBits(); + + // If this is a conversion of N elements of one type to N elements of another + // type, convert each element. This handles FP<->INT cases. + if (SrcBitSize == DstBitSize) { + SmallVector Ops; + for (SDValue Op : BV->op_values()) { + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (Op.getValueType() != SrcEltVT) + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); + Ops.push_back(DAG.getBitcast(DstEltVT, Op)); + AddToWorklist(Ops.back().getNode()); + } + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + BV->getValueType(0).getVectorNumElements()); + return DAG.getBuildVector(VT, SDLoc(BV), Ops); + } + + // Otherwise, we're growing or shrinking the elements. To avoid having to + // handle annoying details of growing/shrinking FP values, we convert them to + // int first. + if (SrcEltVT.isFloatingPoint()) { + // Convert the input float vector to a int vector where the elements are the + // same sizes. + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); + BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); + SrcEltVT = IntVT; + } + + // Now we know the input is an integer vector. If the output is a FP type, + // convert to integer first, then to FP of the right size. + if (DstEltVT.isFloatingPoint()) { + EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); + SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); + + // Next, convert to FP elements of the same size. + return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); + } + + // Okay, we know the src/dst types are both integers of differing types. + assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); + + // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a + // BuildVectorSDNode? + auto *BVN = cast(BV); + + // Extract the constant raw bit data. + BitVector UndefElements; + SmallVector RawBits; + bool IsLE = DAG.getDataLayout().isLittleEndian(); + if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements)) + return SDValue(); + + SDLoc DL(BV); + SmallVector Ops; + for (unsigned I = 0, E = RawBits.size(); I != E; ++I) { + if (UndefElements[I]) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT)); + } + + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); + return DAG.getBuildVector(VT, DL, Ops); +} + // Returns true if floating point contraction is allowed on the FMUL-SDValue // `N` static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { @@ -27643,11 +27755,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse()) return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0)); - // insert_subvector (splat X), (splat X), N2 -> splat X - if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() && - N0.getOperand(0) == N1.getOperand(0)) - return N0; - // If we are inserting a bitcast value into an undef, with the same // number of elements, just use the bitcast input of the extract. // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> @@ -28216,16 +28323,14 @@ SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) { TLI.preferScalarizeSplat(N)) { EVT SrcVT = N0.getValueType(); EVT SrcEltVT = SrcVT.getVectorElementType(); - if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) { - SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); - SDValue Elt = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC); - SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags()); - if (VT.isScalableVector()) - return DAG.getSplatVector(VT, DL, ScalarBO); - SmallVector Ops(VT.getVectorNumElements(), ScalarBO); - return DAG.getBuildVector(VT, DL, Ops); - } + SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); + SDValue Elt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC); + SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags()); + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, ScalarBO); + SmallVector Ops(VT.getVectorNumElements(), ScalarBO); + return DAG.getBuildVector(VT, DL, Ops); } return SDValue(); @@ -28367,8 +28472,10 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4), Flags); AddToWorklist(SETCC.getNode()); - return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, - SCC.getOperand(2), SCC.getOperand(3), Flags); + SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, + SCC.getOperand(2), SCC.getOperand(3)); + SelectNode->setFlags(Flags); + return SelectNode; } return SCC; @@ -28669,9 +28776,9 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { SDValue N10 = N1.getOperand(0); SDValue N20 = N2.getOperand(0); SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20); - SDNodeFlags Flags = N1->getFlags() & N2->getFlags(); - SDValue NewBinOp = - DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags); + SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1)); + NewBinOp->setFlags(N1->getFlags()); + NewBinOp->intersectFlagsWith(N2->getFlags()); return SDValue(NewBinOp.getNode(), N1.getResNo()); } @@ -28683,9 +28790,10 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { // Second op VT might be different (e.g. shift amount type) if (N11.getValueType() == N21.getValueType()) { SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21); - SDNodeFlags Flags = N1->getFlags() & N2->getFlags(); SDValue NewBinOp = - DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags); + DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel); + NewBinOp->setFlags(N1->getFlags()); + NewBinOp->intersectFlagsWith(N2->getFlags()); return SDValue(NewBinOp.getNode(), N1.getResNo()); } } diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 8dd63015971d0..40db4943aedcf 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -1649,14 +1649,24 @@ define i128 @sub_if_uge_i128(i128 %x, i128 %y) { } define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) { -; CHECK-LABEL: sub_if_uge_multiuse_select_i32: -; CHECK: # %bb.0: -; CHECK-NEXT: sltu a2, a0, a1 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: sub a0, a0, a1 -; CHECK-NEXT: sll a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: sub_if_uge_multiuse_select_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: sltu a2, a0, a1 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: sub_if_uge_multiuse_select_i32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: sltu a2, a0, a1 +; RV32ZBB-NEXT: addi a2, a2, -1 +; RV32ZBB-NEXT: and a2, a2, a1 +; RV32ZBB-NEXT: sub a1, a0, a1 +; RV32ZBB-NEXT: minu a0, a0, a1 +; RV32ZBB-NEXT: sll a0, a0, a2 +; RV32ZBB-NEXT: ret %cmp = icmp ult i32 %x, %y %select = select i1 %cmp, i32 0, i32 %y %sub = sub nuw i32 %x, %select diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index e6407279870db..dc78ae1b998da 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -1845,16 +1845,28 @@ define i128 @sub_if_uge_i128(i128 %x, i128 %y) { } define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) { -; CHECK-LABEL: sub_if_uge_multiuse_select_i32: -; CHECK: # %bb.0: -; CHECK-NEXT: sext.w a2, a1 -; CHECK-NEXT: sext.w a3, a0 -; CHECK-NEXT: sltu a2, a3, a2 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a1, a2, a1 -; CHECK-NEXT: subw a0, a0, a1 -; CHECK-NEXT: sllw a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: sub_if_uge_multiuse_select_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: sext.w a3, a0 +; RV64I-NEXT: sltu a2, a3, a2 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: and a1, a2, a1 +; RV64I-NEXT: subw a0, a0, a1 +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: sub_if_uge_multiuse_select_i32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.w a2, a1 +; RV64ZBB-NEXT: sext.w a3, a0 +; RV64ZBB-NEXT: subw a0, a0, a1 +; RV64ZBB-NEXT: sltu a2, a3, a2 +; RV64ZBB-NEXT: addi a2, a2, -1 +; RV64ZBB-NEXT: and a1, a2, a1 +; RV64ZBB-NEXT: minu a0, a3, a0 +; RV64ZBB-NEXT: sllw a0, a0, a1 +; RV64ZBB-NEXT: ret %cmp = icmp ult i32 %x, %y %select = select i1 %cmp, i32 0, i32 %y %sub = sub nuw i32 %x, %select