[InstCombine] Fold insert sequence if first ins has multiple users.

Summary:
If the first insertelement instruction has multiple users and inserts at
position 0, we can re-use this instruction when folding a chain of
insertelement instructions. As we need to generate the first
insertelement instruction anyways, this should be a strict improvement.

We could get rid of the restriction of inserting at position 0 by
creating a different shufflemask, but it is probably worth to keep the
first insertelement instruction with position 0, as this is easier to do
efficiently than at other positions I think.

Reviewers: grosser, mkuper, fpetrogalli, efriedma

Reviewed By: fpetrogalli

Subscribers: gareevroman, llvm-commits

Differential Revision: https://reviews.llvm.org/D37064

llvm-svn: 312110
This commit is contained in:
Florian Hahn 2017-08-30 10:54:21 +00:00
parent 1a7c369e08
commit b992feee13
2 changed files with 46 additions and 6 deletions

View File

@ -615,6 +615,7 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
Value *SplatVal = InsElt.getOperand(1);
InsertElementInst *CurrIE = &InsElt;
SmallVector<bool, 16> ElementPresent(NumElements, false);
InsertElementInst *FirstIE = nullptr;
// Walk the chain backwards, keeping track of which indices we inserted into,
// until we hit something that isn't an insert of the splatted value.
@ -623,12 +624,18 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
if (!Idx || CurrIE->getOperand(1) != SplatVal)
return nullptr;
// Check none of the intermediate steps have any additional uses.
if ((CurrIE != &InsElt) && !CurrIE->hasOneUse())
InsertElementInst *NextIE =
dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
// Check none of the intermediate steps have any additional uses, except
// for the root insertelement instruction, which can be re-used, if it
// inserts at position 0.
if (CurrIE != &InsElt &&
(!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
return nullptr;
ElementPresent[Idx->getZExtValue()] = true;
CurrIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
FirstIE = CurrIE;
CurrIE = NextIE;
}
// Make sure we've seen an insert into every element.
@ -636,9 +643,14 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
return nullptr;
// All right, create the insert + shuffle.
Instruction *InsertFirst = InsertElementInst::Create(
Instruction *InsertFirst;
if (cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
InsertFirst = FirstIE;
else
InsertFirst = InsertElementInst::Create(
UndefValue::get(VT), SplatVal,
ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0), "", &InsElt);
ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0),
"", &InsElt);
Constant *ZeroMask = ConstantAggregateZero::get(
VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));

View File

@ -51,6 +51,22 @@ define <4 x float> @good4(float %arg) {
ret <4 x float> %tmp7
}
; CHECK-LABEL: @good5(
; CHECK-NEXT: %ins1 = insertelement <4 x float> undef, float %v, i32 0
; CHECK-NEXT: %a1 = fadd <4 x float> %ins1, %ins1
; CHECK-NEXT: %ins4 = shufflevector <4 x float> %ins1, <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: %res = fadd <4 x float> %a1, %ins4
; CHECK-NEXT: ret <4 x float> %res
define <4 x float> @good5(float %v) {
%ins1 = insertelement <4 x float> undef, float %v, i32 0
%a1 = fadd <4 x float> %ins1, %ins1
%ins2 = insertelement<4 x float> %ins1, float %v, i32 1
%ins3 = insertelement<4 x float> %ins2, float %v, i32 2
%ins4 = insertelement<4 x float> %ins3, float %v, i32 3
%res = fadd <4 x float> %a1, %ins4
ret <4 x float> %res
}
; CHECK-LABEL: bad1
; CHECK-NOT: shufflevector
define <4 x float> @bad1(float %arg) {
@ -107,3 +123,15 @@ define <4 x float> @bad6(float %arg, i32 %k) {
%tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
ret <4 x float> %tmp6
}
; CHECK-LABEL: @bad7(
; CHECK-NOT: shufflevector
define <4 x float> @bad7(float %v) {
%ins1 = insertelement <4 x float> undef, float %v, i32 1
%a1 = fadd <4 x float> %ins1, %ins1
%ins2 = insertelement<4 x float> %ins1, float %v, i32 2
%ins3 = insertelement<4 x float> %ins2, float %v, i32 3
%ins4 = insertelement<4 x float> %ins3, float %v, i32 0
%res = fadd <4 x float> %a1, %ins4
ret <4 x float> %res
}