@@ -107,10 +107,8 @@ class VectorCombine {
107
107
const Instruction &I,
108
108
ExtractElementInst *&ConvertToShuffle,
109
109
unsigned PreferredExtractIndex);
110
- void foldExtExtCmp (ExtractElementInst *Ext0, ExtractElementInst *Ext1,
111
- Instruction &I);
112
- void foldExtExtBinop (ExtractElementInst *Ext0, ExtractElementInst *Ext1,
113
- Instruction &I);
110
+ Value *foldExtExtCmp (Value *V0, Value *V1, Value *ExtIndex, Instruction &I);
111
+ Value *foldExtExtBinop (Value *V0, Value *V1, Value *ExtIndex, Instruction &I);
114
112
bool foldExtractExtract (Instruction &I);
115
113
bool foldInsExtFNeg (Instruction &I);
116
114
bool foldInsExtBinop (Instruction &I);
@@ -138,7 +136,7 @@ class VectorCombine {
138
136
bool foldInterleaveIntrinsics (Instruction &I);
139
137
bool shrinkType (Instruction &I);
140
138
141
- void replaceValue (Value &Old, Value &New) {
139
+ void replaceValue (Instruction &Old, Value &New, bool Erase = true ) {
142
140
LLVM_DEBUG (dbgs () << " VC: Replacing: " << Old << ' \n ' );
143
141
LLVM_DEBUG (dbgs () << " With: " << New << ' \n ' );
144
142
Old.replaceAllUsesWith (&New);
@@ -147,7 +145,11 @@ class VectorCombine {
147
145
Worklist.pushUsersToWorkList (*NewI);
148
146
Worklist.pushValue (NewI);
149
147
}
150
- Worklist.pushValue (&Old);
148
+ if (Erase && isInstructionTriviallyDead (&Old)) {
149
+ eraseInstruction (Old);
150
+ } else {
151
+ Worklist.push (&Old);
152
+ }
151
153
}
152
154
153
155
void eraseInstruction (Instruction &I) {
@@ -158,11 +160,23 @@ class VectorCombine {
158
160
159
161
// Push remaining users of the operands and then the operand itself - allows
160
162
// further folds that were hindered by OneUse limits.
161
- for (Value *Op : Ops)
162
- if (auto *OpI = dyn_cast<Instruction>(Op)) {
163
- Worklist.pushUsersToWorkList (*OpI);
164
- Worklist.pushValue (OpI);
163
+ SmallPtrSet<Value *, 4 > Visited;
164
+ for (Value *Op : Ops) {
165
+ if (Visited.insert (Op).second ) {
166
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
167
+ if (RecursivelyDeleteTriviallyDeadInstructions (
168
+ OpI, nullptr , nullptr , [this ](Value *V) {
169
+ if (auto I = dyn_cast<Instruction>(V)) {
170
+ LLVM_DEBUG (dbgs () << " VC: Erased: " << *I << ' \n ' );
171
+ Worklist.remove (I);
172
+ }
173
+ }))
174
+ continue ;
175
+ Worklist.pushUsersToWorkList (*OpI);
176
+ Worklist.pushValue (OpI);
177
+ }
165
178
}
179
+ }
166
180
}
167
181
};
168
182
} // namespace
@@ -546,9 +560,8 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
546
560
// / the source vector (shift the scalar element) to a NewIndex for extraction.
547
561
// / Return null if the input can be constant folded, so that we are not creating
548
562
// / unnecessary instructions.
549
- static ExtractElementInst *translateExtract (ExtractElementInst *ExtElt,
550
- unsigned NewIndex,
551
- IRBuilderBase &Builder) {
563
+ static Value *translateExtract (ExtractElementInst *ExtElt, unsigned NewIndex,
564
+ IRBuilderBase &Builder) {
552
565
// Shufflevectors can only be created for fixed-width vectors.
553
566
Value *X = ExtElt->getVectorOperand ();
554
567
if (!isa<FixedVectorType>(X->getType ()))
@@ -563,52 +576,41 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
563
576
564
577
Value *Shuf = createShiftShuffle (X, cast<ConstantInt>(C)->getZExtValue (),
565
578
NewIndex, Builder);
566
- return dyn_cast<ExtractElementInst>(
567
- Builder.CreateExtractElement (Shuf, NewIndex));
579
+ return Shuf;
568
580
}
569
581
570
582
// / Try to reduce extract element costs by converting scalar compares to vector
571
583
// / compares followed by extract.
572
584
// / cmp (ext0 V0, C), (ext1 V1, C)
573
- void VectorCombine::foldExtExtCmp (ExtractElementInst *Ext0 ,
574
- ExtractElementInst *Ext1, Instruction &I) {
585
+ Value * VectorCombine::foldExtExtCmp (Value *V0, Value *V1, Value *ExtIndex ,
586
+ Instruction &I) {
575
587
assert (isa<CmpInst>(&I) && " Expected a compare" );
576
- assert (cast<ConstantInt>(Ext0->getIndexOperand ())->getZExtValue () ==
577
- cast<ConstantInt>(Ext1->getIndexOperand ())->getZExtValue () &&
578
- " Expected matching constant extract indexes" );
579
588
580
589
// cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
581
590
++NumVecCmp;
582
591
CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate ();
583
- Value *V0 = Ext0->getVectorOperand (), *V1 = Ext1->getVectorOperand ();
584
592
Value *VecCmp = Builder.CreateCmp (Pred, V0, V1);
585
- Value *NewExt = Builder.CreateExtractElement (VecCmp, Ext0->getIndexOperand ());
586
- replaceValue (I, *NewExt);
593
+ return Builder.CreateExtractElement (VecCmp, ExtIndex, " foldExtExtCmp" );
587
594
}
588
595
589
596
// / Try to reduce extract element costs by converting scalar binops to vector
590
597
// / binops followed by extract.
591
598
// / bo (ext0 V0, C), (ext1 V1, C)
592
- void VectorCombine::foldExtExtBinop (ExtractElementInst *Ext0 ,
593
- ExtractElementInst *Ext1, Instruction &I) {
599
+ Value * VectorCombine::foldExtExtBinop (Value *V0, Value *V1, Value *ExtIndex ,
600
+ Instruction &I) {
594
601
assert (isa<BinaryOperator>(&I) && " Expected a binary operator" );
595
- assert (cast<ConstantInt>(Ext0->getIndexOperand ())->getZExtValue () ==
596
- cast<ConstantInt>(Ext1->getIndexOperand ())->getZExtValue () &&
597
- " Expected matching constant extract indexes" );
598
602
599
603
// bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
600
604
++NumVecBO;
601
- Value *V0 = Ext0->getVectorOperand (), *V1 = Ext1->getVectorOperand ();
602
- Value *VecBO =
603
- Builder.CreateBinOp (cast<BinaryOperator>(&I)->getOpcode (), V0, V1);
605
+ Value *VecBO = Builder.CreateBinOp (cast<BinaryOperator>(&I)->getOpcode (), V0,
606
+ V1, " foldExtExtBinop" );
604
607
605
608
// All IR flags are safe to back-propagate because any potential poison
606
609
// created in unused vector elements is discarded by the extract.
607
610
if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
608
611
VecBOInst->copyIRFlags (&I);
609
612
610
- Value *NewExt = Builder.CreateExtractElement (VecBO, Ext0->getIndexOperand ());
611
- replaceValue (I, *NewExt);
613
+ return Builder.CreateExtractElement (VecBO, ExtIndex, " foldExtExtBinop" );
612
614
}
613
615
614
616
// / Match an instruction with extracted vector operands.
@@ -647,25 +649,29 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
647
649
if (isExtractExtractCheap (Ext0, Ext1, I, ExtractToChange, InsertIndex))
648
650
return false ;
649
651
652
+ Value *ExtOp0 = Ext0->getVectorOperand ();
653
+ Value *ExtOp1 = Ext1->getVectorOperand ();
654
+
650
655
if (ExtractToChange) {
651
656
unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
652
- ExtractElementInst *NewExtract =
657
+ Value *NewExtOp =
653
658
translateExtract (ExtractToChange, CheapExtractIdx, Builder);
654
- if (!NewExtract )
659
+ if (!NewExtOp )
655
660
return false ;
656
661
if (ExtractToChange == Ext0)
657
- Ext0 = NewExtract ;
662
+ ExtOp0 = NewExtOp ;
658
663
else
659
- Ext1 = NewExtract ;
664
+ ExtOp1 = NewExtOp ;
660
665
}
661
666
662
- if (Pred != CmpInst::BAD_ICMP_PREDICATE )
663
- foldExtExtCmp (Ext0, Ext1, I );
664
- else
665
- foldExtExtBinop (Ext0, Ext1, I);
666
-
667
+ Value *ExtIndex = ExtractToChange == Ext0 ? Ext1-> getIndexOperand ( )
668
+ : Ext0-> getIndexOperand ( );
669
+ Value *NewExt = Pred != CmpInst::BAD_ICMP_PREDICATE
670
+ ? foldExtExtCmp (ExtOp0, ExtOp1, ExtIndex, I)
671
+ : foldExtExtBinop (ExtOp0, ExtOp1, ExtIndex, I);
667
672
Worklist.push (Ext0);
668
673
Worklist.push (Ext1);
674
+ replaceValue (I, *NewExt);
669
675
return true ;
670
676
}
671
677
@@ -1824,7 +1830,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
1824
1830
LI->getAlign (), VecTy->getElementType (), Idx, *DL);
1825
1831
NewLoad->setAlignment (ScalarOpAlignment);
1826
1832
1827
- replaceValue (*EI, *NewLoad);
1833
+ replaceValue (*EI, *NewLoad, false );
1828
1834
}
1829
1835
1830
1836
FailureGuard.release ();
@@ -2910,7 +2916,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
2910
2916
if (!IL.first )
2911
2917
return true ;
2912
2918
Value *V = IL.first ->get ();
2913
- if (auto *I = dyn_cast<Instruction>(V); I && !I->hasOneUse ())
2919
+ if (auto *I = dyn_cast<Instruction>(V); I && !I->hasOneUser ())
2914
2920
return false ;
2915
2921
if (V->getValueID () != FrontV->getValueID ())
2916
2922
return false ;
@@ -3112,7 +3118,7 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
3112
3118
Shuffle->getOperand (0 ), Shuffle->getOperand (1 ), ConcatMask);
3113
3119
LLVM_DEBUG (dbgs () << " Created new shuffle: " << *NewShuffle << " \n " );
3114
3120
replaceValue (*Shuffle, *NewShuffle);
3115
- MadeChanges = true ;
3121
+ return true ;
3116
3122
}
3117
3123
3118
3124
// See if we can re-use foldSelectShuffle, getting it to reduce the size of
@@ -3608,7 +3614,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
3608
3614
for (int S = 0 , E = ReconstructMasks.size (); S != E; S++) {
3609
3615
Builder.SetInsertPoint (Shuffles[S]);
3610
3616
Value *NSV = Builder.CreateShuffleVector (NOp0, NOp1, ReconstructMasks[S]);
3611
- replaceValue (*Shuffles[S], *NSV);
3617
+ replaceValue (*Shuffles[S], *NSV, false );
3612
3618
}
3613
3619
3614
3620
Worklist.pushValue (NSV0A);
@@ -3873,8 +3879,7 @@ bool VectorCombine::run() {
3873
3879
3874
3880
LLVM_DEBUG (dbgs () << " \n\n VECTORCOMBINE on " << F.getName () << " \n " );
3875
3881
3876
- bool MadeChange = false ;
3877
- auto FoldInst = [this , &MadeChange](Instruction &I) {
3882
+ auto FoldInst = [this ](Instruction &I) {
3878
3883
Builder.SetInsertPoint (&I);
3879
3884
bool IsVectorType = isa<VectorType>(I.getType ());
3880
3885
bool IsFixedVectorType = isa<FixedVectorType>(I.getType ());
@@ -3889,10 +3894,12 @@ bool VectorCombine::run() {
3889
3894
if (IsFixedVectorType) {
3890
3895
switch (Opcode) {
3891
3896
case Instruction::InsertElement:
3892
- MadeChange |= vectorizeLoadInsert (I);
3897
+ if (vectorizeLoadInsert (I))
3898
+ return true ;
3893
3899
break ;
3894
3900
case Instruction::ShuffleVector:
3895
- MadeChange |= widenSubvectorLoad (I);
3901
+ if (widenSubvectorLoad (I))
3902
+ return true ;
3896
3903
break ;
3897
3904
default :
3898
3905
break ;
@@ -3902,19 +3909,25 @@ bool VectorCombine::run() {
3902
3909
// This transform works with scalable and fixed vectors
3903
3910
// TODO: Identify and allow other scalable transforms
3904
3911
if (IsVectorType) {
3905
- MadeChange |= scalarizeOpOrCmp (I);
3906
- MadeChange |= scalarizeLoadExtract (I);
3907
- MadeChange |= scalarizeExtExtract (I);
3908
- MadeChange |= scalarizeVPIntrinsic (I);
3909
- MadeChange |= foldInterleaveIntrinsics (I);
3912
+ if (scalarizeOpOrCmp (I))
3913
+ return true ;
3914
+ if (scalarizeLoadExtract (I))
3915
+ return true ;
3916
+ if (scalarizeExtExtract (I))
3917
+ return true ;
3918
+ if (scalarizeVPIntrinsic (I))
3919
+ return true ;
3920
+ if (foldInterleaveIntrinsics (I))
3921
+ return true ;
3910
3922
}
3911
3923
3912
3924
if (Opcode == Instruction::Store)
3913
- MadeChange |= foldSingleElementStore (I);
3925
+ if (foldSingleElementStore (I))
3926
+ return true ;
3914
3927
3915
3928
// If this is an early pipeline invocation of this pass, we are done.
3916
3929
if (TryEarlyFoldsOnly)
3917
- return ;
3930
+ return false ;
3918
3931
3919
3932
// Otherwise, try folds that improve codegen but may interfere with
3920
3933
// early IR canonicalizations.
@@ -3923,72 +3936,91 @@ bool VectorCombine::run() {
3923
3936
if (IsFixedVectorType) {
3924
3937
switch (Opcode) {
3925
3938
case Instruction::InsertElement:
3926
- MadeChange |= foldInsExtFNeg (I);
3927
- MadeChange |= foldInsExtBinop (I);
3928
- MadeChange |= foldInsExtVectorToShuffle (I);
3939
+ if (foldInsExtFNeg (I))
3940
+ return true ;
3941
+ if (foldInsExtBinop (I))
3942
+ return true ;
3943
+ if (foldInsExtVectorToShuffle (I))
3944
+ return true ;
3929
3945
break ;
3930
3946
case Instruction::ShuffleVector:
3931
- MadeChange |= foldPermuteOfBinops (I);
3932
- MadeChange |= foldShuffleOfBinops (I);
3933
- MadeChange |= foldShuffleOfSelects (I);
3934
- MadeChange |= foldShuffleOfCastops (I);
3935
- MadeChange |= foldShuffleOfShuffles (I);
3936
- MadeChange |= foldShuffleOfIntrinsics (I);
3937
- MadeChange |= foldSelectShuffle (I);
3938
- MadeChange |= foldShuffleToIdentity (I);
3947
+ if (foldPermuteOfBinops (I))
3948
+ return true ;
3949
+ if (foldShuffleOfBinops (I))
3950
+ return true ;
3951
+ if (foldShuffleOfSelects (I))
3952
+ return true ;
3953
+ if (foldShuffleOfCastops (I))
3954
+ return true ;
3955
+ if (foldShuffleOfShuffles (I))
3956
+ return true ;
3957
+ if (foldShuffleOfIntrinsics (I))
3958
+ return true ;
3959
+ if (foldSelectShuffle (I))
3960
+ return true ;
3961
+ if (foldShuffleToIdentity (I))
3962
+ return true ;
3939
3963
break ;
3940
3964
case Instruction::BitCast:
3941
- MadeChange |= foldBitcastShuffle (I);
3965
+ if (foldBitcastShuffle (I))
3966
+ return true ;
3942
3967
break ;
3943
3968
case Instruction::And:
3944
3969
case Instruction::Or:
3945
3970
case Instruction::Xor:
3946
- MadeChange |= foldBitOpOfCastops (I);
3971
+ if (foldBitOpOfCastops (I))
3972
+ return true ;
3947
3973
break ;
3948
3974
default :
3949
- MadeChange |= shrinkType (I);
3975
+ if (shrinkType (I))
3976
+ return true ;
3950
3977
break ;
3951
3978
}
3952
3979
} else {
3953
3980
switch (Opcode) {
3954
3981
case Instruction::Call:
3955
- MadeChange |= foldShuffleFromReductions (I);
3956
- MadeChange |= foldCastFromReductions (I);
3982
+ if (foldShuffleFromReductions (I))
3983
+ return true ;
3984
+ if (foldCastFromReductions (I))
3985
+ return true ;
3957
3986
break ;
3958
3987
case Instruction::ICmp:
3959
3988
case Instruction::FCmp:
3960
- MadeChange |= foldExtractExtract (I);
3989
+ if (foldExtractExtract (I))
3990
+ return true ;
3961
3991
break ;
3962
3992
case Instruction::Or:
3963
- MadeChange |= foldConcatOfBoolMasks (I);
3993
+ if (foldConcatOfBoolMasks (I))
3994
+ return true ;
3964
3995
[[fallthrough]];
3965
3996
default :
3966
3997
if (Instruction::isBinaryOp (Opcode)) {
3967
- MadeChange |= foldExtractExtract (I);
3968
- MadeChange |= foldExtractedCmps (I);
3969
- MadeChange |= foldBinopOfReductions (I);
3998
+ if (foldExtractExtract (I))
3999
+ return true ;
4000
+ if (foldExtractedCmps (I))
4001
+ return true ;
4002
+ if (foldBinopOfReductions (I))
4003
+ return true ;
3970
4004
}
3971
4005
break ;
3972
4006
}
3973
4007
}
4008
+ return false ;
3974
4009
};
3975
4010
3976
- SmallVector<Instruction*, 128 > InstrsForInstructionWorklist ;
4011
+ bool MadeChange = false ;
3977
4012
for (BasicBlock &BB : F) {
3978
4013
// Ignore unreachable basic blocks.
3979
4014
if (!DT.isReachableFromEntry (&BB))
3980
4015
continue ;
3981
- for (Instruction &I : BB) {
4016
+ // Use early increment range so that we can erase instructions in loop.
4017
+ for (Instruction &I : make_early_inc_range (BB)) {
3982
4018
if (I.isDebugOrPseudoInst ())
3983
4019
continue ;
3984
- InstrsForInstructionWorklist. push_back (& I);
4020
+ MadeChange |= FoldInst ( I);
3985
4021
}
3986
4022
}
3987
4023
3988
- Worklist.reserve (InstrsForInstructionWorklist.size ());
3989
- for (auto I : reverse (InstrsForInstructionWorklist))
3990
- Worklist.push (I);
3991
-
3992
4024
while (!Worklist.isEmpty ()) {
3993
4025
Instruction *I = Worklist.removeOne ();
3994
4026
if (!I)
@@ -3999,7 +4031,7 @@ bool VectorCombine::run() {
3999
4031
continue ;
4000
4032
}
4001
4033
4002
- FoldInst (*I);
4034
+ MadeChange |= FoldInst (*I);
4003
4035
}
4004
4036
4005
4037
return MadeChange;
0 commit comments