[InstCombine] Fold insert sequence if first ins has multiple users.

Summary: If the first insertelement instruction has multiple users and inserts at position 0, we can re-use this instruction when folding a chain of insertelement instructions. As we need to generate the first insertelement instruction anyways, this should be a strict improvement. We could get rid of the restriction of inserting at position 0 by creating a different shufflemask, but it is probably worth to keep the first insertelement instruction with position 0, as this is easier to do efficiently than at other positions I think. Reviewers: grosser, mkuper, fpetrogalli, efriedma Reviewed By: fpetrogalli Subscribers: gareevroman, llvm-commits Differential Revision: https://reviews.llvm.org/D37064 llvm-svn: 312110
2025-01-12 19:01:55 +00:00 · 2017-08-30 10:54:21 +00:00 · 2017-08-30 10:54:21 +00:00 · b992feee13
commit b992feee13
parent 1a7c369e08
2 changed files with 46 additions and 6 deletions
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@ -615,6 +615,7 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
  Value *SplatVal = InsElt.getOperand(1);
  InsertElementInst *CurrIE = &InsElt;  
  SmallVector<bool, 16> ElementPresent(NumElements, false);
+  InsertElementInst *FirstIE = nullptr;

  // Walk the chain backwards, keeping track of which indices we inserted into,
  // until we hit something that isn't an insert of the splatted value.
@ -623,12 +624,18 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
    if (!Idx || CurrIE->getOperand(1) != SplatVal)
      return nullptr;

-    // Check none of the intermediate steps have any additional uses.
-    if ((CurrIE != &InsElt) && !CurrIE->hasOneUse())
+    InsertElementInst *NextIE =
+      dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
+    // Check none of the intermediate steps have any additional uses, except
+    // for the root insertelement instruction, which can be re-used, if it
+    // inserts at position 0.
+    if (CurrIE != &InsElt &&
+        (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
      return nullptr;

    ElementPresent[Idx->getZExtValue()] = true;
-    CurrIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
+    FirstIE = CurrIE;
+    CurrIE = NextIE;
  }

  // Make sure we've seen an insert into every element.
@ -636,9 +643,14 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
    return nullptr;

  // All right, create the insert + shuffle.
-  Instruction *InsertFirst = InsertElementInst::Create(
-      UndefValue::get(VT), SplatVal,
-      ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0), "", &InsElt);
+  Instruction *InsertFirst;
+  if (cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
+    InsertFirst = FirstIE;
+  else
+    InsertFirst = InsertElementInst::Create(
+        UndefValue::get(VT), SplatVal,
+        ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0),
+        "", &InsElt);

  Constant *ZeroMask = ConstantAggregateZero::get(
      VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));
--- a/llvm/test/Transforms/InstCombine/broadcast.ll
+++ b/llvm/test/Transforms/InstCombine/broadcast.ll
@ -51,6 +51,22 @@ define <4 x float> @good4(float %arg) {
  ret <4 x float> %tmp7
 }

+; CHECK-LABEL: @good5(
+; CHECK-NEXT:    %ins1 = insertelement <4 x float> undef, float %v, i32 0
+; CHECK-NEXT:    %a1 = fadd <4 x float> %ins1, %ins1
+; CHECK-NEXT:    %ins4 = shufflevector <4 x float> %ins1, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    %res = fadd <4 x float> %a1, %ins4
+; CHECK-NEXT: ret <4 x float> %res
+define <4 x float> @good5(float %v) {
+  %ins1 = insertelement <4 x float> undef, float %v, i32 0
+  %a1 = fadd <4 x float> %ins1, %ins1
+  %ins2 = insertelement<4 x float> %ins1, float %v, i32 1
+  %ins3 = insertelement<4 x float> %ins2, float %v, i32 2
+  %ins4 = insertelement<4 x float> %ins3, float %v, i32 3
+  %res = fadd <4 x float> %a1, %ins4
+  ret <4 x float> %res
+}
+
 ; CHECK-LABEL: bad1
 ; CHECK-NOT: shufflevector
 define <4 x float> @bad1(float %arg) {
@ -107,3 +123,15 @@ define <4 x float> @bad6(float %arg, i32 %k) {
  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
  ret <4 x float> %tmp6
 }
+
+; CHECK-LABEL: @bad7(
+; CHECK-NOT: shufflevector
+define <4 x float> @bad7(float %v) {
+  %ins1 = insertelement <4 x float> undef, float %v, i32 1
+  %a1 = fadd <4 x float> %ins1, %ins1
+  %ins2 = insertelement<4 x float> %ins1, float %v, i32 2
+  %ins3 = insertelement<4 x float> %ins2, float %v, i32 3
+  %ins4 = insertelement<4 x float> %ins3, float %v, i32 0
+  %res = fadd <4 x float> %a1, %ins4
+  ret <4 x float> %res
+}