Add intermediate subtract instructions to reassociation worklist.

We sometimes create intermediate subtract instructions during reassociation. Adding these to the worklist to revisit exposes many additional reassociation opportunities. Patch by Aditya Nandakumar. llvm-svn: 253240
2024-11-29 22:30:33 +00:00 · 2015-11-16 18:07:30 +00:00 · 2015-11-16 18:07:30 +00:00 · 4d5ef8fb85
commit 4d5ef8fb85
parent 723dbfed60
9 changed files with 87 additions and 26 deletions
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@ -881,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
 /// that computes the negative version of the value specified.  The negative
 /// version of the value is returned, and BI is left pointing at the instruction
 /// that should be processed next by the reassociation pass.
-static Value *NegateValue(Value *V, Instruction *BI) {
+/// Also add intermediate instructions to the redo list that are modified while
+/// pushing the negates through adds.  These will be revisited to see if
+/// additional opportunities have been exposed.
+static Value *NegateValue(Value *V, Instruction *BI,
+                          SetVector<AssertingVH<Instruction>> &ToRedo) {
  if (Constant *C = dyn_cast<Constant>(V)) {
    if (C->getType()->isFPOrFPVectorTy()) {
      return ConstantExpr::getFNeg(C);
@ -902,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
  if (BinaryOperator *I =
          isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
    // Push the negates through the add.
-    I->setOperand(0, NegateValue(I->getOperand(0), BI));
-    I->setOperand(1, NegateValue(I->getOperand(1), BI));
+    I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
+    I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
    if (I->getOpcode() == Instruction::Add) {
      I->setHasNoUnsignedWrap(false);
      I->setHasNoSignedWrap(false);
@ -916,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
    //
    I->moveBefore(BI);
    I->setName(I->getName()+".neg");
+
+    // Add the intermediate negates to the redo list as processing them later
+    // could expose more reassociating opportunities.
+    ToRedo.insert(I);
    return I;
  }

@ -955,12 +963,15 @@ static Value *NegateValue(Value *V, Instruction *BI) {
    } else {
      TheNeg->andIRFlags(BI);
    }
+    ToRedo.insert(TheNeg);
    return TheNeg;
  }

  // Insert a 'neg' instruction that subtracts the value from zero to get the
  // negation.
-  return CreateNeg(V, V->getName() + ".neg", BI, BI);
+  BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
+  ToRedo.insert(NewNeg);
+  return NewNeg;
 }

 /// Return true if we should break up this subtract of X-Y into (X + -Y).
@ -994,14 +1005,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {

 /// If we have (X-Y), and if either X is an add, or if this is only used by an
 /// add, transform this into (X+(0-Y)) to promote better reassociation.
-static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
+static BinaryOperator *
+BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
  // Convert a subtract into an add and a neg instruction. This allows sub
  // instructions to be commuted with other add instructions.
  //
  // Calculate the negative value of Operand 1 of the sub instruction,
  // and set it as the RHS of the add instruction we just made.
  //
-  Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+  Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
  BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
  Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
  Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
@ -2068,7 +2080,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
  // see if we can convert it to X+-Y.
  if (I->getOpcode() == Instruction::Sub) {
    if (ShouldBreakUpSubtract(I)) {
-      Instruction *NI = BreakUpSubtract(I);
+      Instruction *NI = BreakUpSubtract(I, RedoInsts);
      RedoInsts.insert(I);
      MadeChange = true;
      I = NI;
@ -2079,6 +2091,12 @@ void Reassociate::OptimizeInst(Instruction *I) {
          (!I->hasOneUse() ||
           !isReassociableOp(I->user_back(), Instruction::Mul))) {
        Instruction *NI = LowerNegateToMultiply(I);
+        // If the negate was simplified, revisit the users to see if we can
+        // reassociate further.
+        for (User *U : NI->users()) {
+          if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+            RedoInsts.insert(Tmp);
+        }
        RedoInsts.insert(I);
        MadeChange = true;
        I = NI;
@ -2086,7 +2104,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
    }
  } else if (I->getOpcode() == Instruction::FSub) {
    if (ShouldBreakUpSubtract(I)) {
-      Instruction *NI = BreakUpSubtract(I);
+      Instruction *NI = BreakUpSubtract(I, RedoInsts);
      RedoInsts.insert(I);
      MadeChange = true;
      I = NI;
@ -2096,7 +2114,13 @@ void Reassociate::OptimizeInst(Instruction *I) {
      if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
          (!I->hasOneUse() ||
           !isReassociableOp(I->user_back(), Instruction::FMul))) {
+        // If the negate was simplified, revisit the users to see if we can
+        // reassociate further.
        Instruction *NI = LowerNegateToMultiply(I);
+        for (User *U : NI->users()) {
+          if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+            RedoInsts.insert(Tmp);
+        }
        RedoInsts.insert(I);
        MadeChange = true;
        I = NI;
@ -2111,8 +2135,14 @@ void Reassociate::OptimizeInst(Instruction *I) {
  // If this is an interior node of a reassociable tree, ignore it until we
  // get to the root of the tree, to avoid N^2 analysis.
  unsigned Opcode = BO->getOpcode();
-  if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode)
+  if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
+    // During the initial run we will get to the root of the tree.
+    // But if we get here while we are redoing instructions, there is no
+    // guarantee that the root will be visited. So Redo later
+    if (BO->user_back() != BO)
+      RedoInsts.insert(BO->user_back());
    return;
+  }

  // If this is an add tree that is used by a sub instruction, ignore it
  // until we process the subtract.
--- a/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@ -16,9 +16,9 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
 define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; CHECK-LABEL: @test2
-; CHECK-NEXT: fadd fast <2 x float> %c, %b
-; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2
-; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast <2 x float> %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast <2 x float> %a, %a
+; CHECK-NEXT: fmul fast <2 x float> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT: ret <2 x float>

  %t0 = fmul fast <2 x float> %a, %b
@ -133,8 +133,8 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; Check x*y+y*x -> x*y*2.
 define <2 x double> @test11(<2 x double> %x, <2 x double> %y) {
 ; CHECK-LABEL: @test11
-; CHECK-NEXT: %factor = fmul fast <2 x double> %y, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x
+; CHECK-NEXT: %factor = fmul fast <2 x double> %x, <double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %y
 ; CHECK-NEXT: ret <2 x double> %tmp1

  %1 = fmul fast <2 x double> %x, %y
--- a/test/Transforms/Reassociate/fast-basictest.ll
+++ b/test/Transforms/Reassociate/fast-basictest.ll
@ -108,7 +108,7 @@ define float @test7(float %A, float %B, float %C) {
 ; CHECK-LABEL: @test7
 ; CHECK-NEXT: fadd fast float %C, %B
 ; CHECK-NEXT: fmul fast float %A, %A
-; CHECK-NEXT: fmul fast float %1, %tmp2
+; CHECK-NEXT: fmul fast float %tmp3, %tmp2
 ; CHECK-NEXT: ret float

  %aa = fmul fast float %A, %A
--- a/test/Transforms/Reassociate/fast-fp-commute.ll
+++ b/test/Transforms/Reassociate/fast-fp-commute.ll
@ -33,8 +33,8 @@ define float @test2(float %x, float %y) {

 define float @test3(float %x, float %y) {
 ; CHECK-LABEL: test3
-; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00
-; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
+; CHECK-NEXT: %factor = fmul fast float %x, 2.000000e+00
+; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y
 ; CHECK-NEXT: ret float %tmp1

  %1 = fmul fast float %x, %y
--- a/test/Transforms/Reassociate/fast-multistep.ll
+++ b/test/Transforms/Reassociate/fast-multistep.ll
@ -3,9 +3,9 @@
 define float @fmultistep1(float %a, float %b, float %c) {
 ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
 ; CHECK-LABEL: @fmultistep1
-; CHECK-NEXT: fadd fast float %c, %b
-; CHECK-NEXT: fmul fast float %a, %tmp2
-; CHECK-NEXT: fmul fast float %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast float %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast float %a, %a
+; CHECK-NEXT: fmul fast float [[TMP2]], [[TMP1]]
 ; CHECK-NEXT: ret float

  %t0 = fmul fast float %a, %b
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@ -8,9 +8,9 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
  %t2 = mul i64 %a, %c
  %t3 = mul i64 %a, %t2 ; a*(a*c)
  %t4 = add i64 %t1, %t3
-; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %a, %tmp{{.*}}
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = add i64 %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = mul i64 %a, %a
+; CHECK-NEXT: mul i64 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT: ret
  ret i64 %t4
 }
--- a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
+++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@ -0,0 +1,31 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+; CHECK-LABEL: faddsubAssoc1
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
+; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is A op (B op C)
+define half @faddsubAssoc1(half %a, half %b) {
+  %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+  %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+  %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+  %tmp4 = fsub fast half %tmp2, %tmp1 ; 5 * a - 3 * b
+  %tmp5 = fsub fast half %tmp3, %tmp4 ; 2 * b - ( 5 * a - 3 * b)
+  ret half %tmp5 ; = 5 * (b - a)
+}
+
+; CHECK-LABEL: faddsubAssoc2
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH3C00
+; CHECK: fadd fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is (A op B) op C
+define half @faddsubAssoc2(half %a, half %b) {
+  %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+  %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+  %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+  %tmp4 = fadd fast half %tmp2, %tmp1 ; 5 * a + 3 * b
+  %tmp5 = fsub fast half %tmp4, %tmp3 ; (5 * a + 3 * b) - (2 * b)
+  ret half %tmp5 ; = 5 * a + b
+}
+
--- a/test/Transforms/Reassociate/secondary.ll
+++ b/test/Transforms/Reassociate/secondary.ll
@ -6,7 +6,7 @@

 ; CHECK:     define
 ; CHECK-NOT: undef
-; CHECK:     %factor = mul i32 %tmp3, -2
+; CHECK:     %factor = mul i32 %tmp3.neg, 2
 ; CHECK-NOT: undef
 ; CHECK:     }

--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
  %xor1 = xor i32 %xor, %and
  ret i32 %xor1
 ; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %y, 123
-; CHECK: %xor1 = xor i32 %xor, %x
+; CHECK: %xor = xor i32 %x, 123
+; CHECK: %xor1 = xor i32 %xor, %y
 ; CHECK: ret i32 %xor1
 }