diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index f6c18d626cf..82c79e7d919 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -881,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, /// that computes the negative version of the value specified. The negative /// version of the value is returned, and BI is left pointing at the instruction /// that should be processed next by the reassociation pass. -static Value *NegateValue(Value *V, Instruction *BI) { +/// Also add intermediate instructions to the redo list that are modified while +/// pushing the negates through adds. These will be revisited to see if +/// additional opportunities have been exposed. +static Value *NegateValue(Value *V, Instruction *BI, + SetVector> &ToRedo) { if (Constant *C = dyn_cast(V)) { if (C->getType()->isFPOrFPVectorTy()) { return ConstantExpr::getFNeg(C); @@ -902,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) { if (BinaryOperator *I = isReassociableOp(V, Instruction::Add, Instruction::FAdd)) { // Push the negates through the add. - I->setOperand(0, NegateValue(I->getOperand(0), BI)); - I->setOperand(1, NegateValue(I->getOperand(1), BI)); + I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo)); + I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo)); if (I->getOpcode() == Instruction::Add) { I->setHasNoUnsignedWrap(false); I->setHasNoSignedWrap(false); @@ -916,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) { // I->moveBefore(BI); I->setName(I->getName()+".neg"); + + // Add the intermediate negates to the redo list as processing them later + // could expose more reassociating opportunities. + ToRedo.insert(I); return I; } @@ -955,12 +963,15 @@ static Value *NegateValue(Value *V, Instruction *BI) { } else { TheNeg->andIRFlags(BI); } + ToRedo.insert(TheNeg); return TheNeg; } // Insert a 'neg' instruction that subtracts the value from zero to get the // negation. - return CreateNeg(V, V->getName() + ".neg", BI, BI); + BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI); + ToRedo.insert(NewNeg); + return NewNeg; } /// Return true if we should break up this subtract of X-Y into (X + -Y). @@ -994,14 +1005,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) { /// If we have (X-Y), and if either X is an add, or if this is only used by an /// add, transform this into (X+(0-Y)) to promote better reassociation. -static BinaryOperator *BreakUpSubtract(Instruction *Sub) { +static BinaryOperator * +BreakUpSubtract(Instruction *Sub, SetVector> &ToRedo) { // Convert a subtract into an add and a neg instruction. This allows sub // instructions to be commuted with other add instructions. // // Calculate the negative value of Operand 1 of the sub instruction, // and set it as the RHS of the add instruction we just made. // - Value *NegVal = NegateValue(Sub->getOperand(1), Sub); + Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo); BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub); Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op. Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op. @@ -2068,7 +2080,7 @@ void Reassociate::OptimizeInst(Instruction *I) { // see if we can convert it to X+-Y. if (I->getOpcode() == Instruction::Sub) { if (ShouldBreakUpSubtract(I)) { - Instruction *NI = BreakUpSubtract(I); + Instruction *NI = BreakUpSubtract(I, RedoInsts); RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2079,6 +2091,12 @@ void Reassociate::OptimizeInst(Instruction *I) { (!I->hasOneUse() || !isReassociableOp(I->user_back(), Instruction::Mul))) { Instruction *NI = LowerNegateToMultiply(I); + // If the negate was simplified, revisit the users to see if we can + // reassociate further. + for (User *U : NI->users()) { + if (BinaryOperator *Tmp = dyn_cast(U)) + RedoInsts.insert(Tmp); + } RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2086,7 +2104,7 @@ void Reassociate::OptimizeInst(Instruction *I) { } } else if (I->getOpcode() == Instruction::FSub) { if (ShouldBreakUpSubtract(I)) { - Instruction *NI = BreakUpSubtract(I); + Instruction *NI = BreakUpSubtract(I, RedoInsts); RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2096,7 +2114,13 @@ void Reassociate::OptimizeInst(Instruction *I) { if (isReassociableOp(I->getOperand(1), Instruction::FMul) && (!I->hasOneUse() || !isReassociableOp(I->user_back(), Instruction::FMul))) { + // If the negate was simplified, revisit the users to see if we can + // reassociate further. Instruction *NI = LowerNegateToMultiply(I); + for (User *U : NI->users()) { + if (BinaryOperator *Tmp = dyn_cast(U)) + RedoInsts.insert(Tmp); + } RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2111,8 +2135,14 @@ void Reassociate::OptimizeInst(Instruction *I) { // If this is an interior node of a reassociable tree, ignore it until we // get to the root of the tree, to avoid N^2 analysis. unsigned Opcode = BO->getOpcode(); - if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) + if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) { + // During the initial run we will get to the root of the tree. + // But if we get here while we are redoing instructions, there is no + // guarantee that the root will be visited. So Redo later + if (BO->user_back() != BO) + RedoInsts.insert(BO->user_back()); return; + } // If this is an add tree that is used by a sub instruction, ignore it // until we process the subtract. diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll index 9fbb5ccfe9a..fb76b9d990b 100644 --- a/test/Transforms/Reassociate/fast-ReassociateVector.ll +++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -16,9 +16,9 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)). define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { ; CHECK-LABEL: @test2 -; CHECK-NEXT: fadd fast <2 x float> %c, %b -; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2 -; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a +; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast <2 x float> %c, %b +; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast <2 x float> %a, %a +; CHECK-NEXT: fmul fast <2 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <2 x float> %t0 = fmul fast <2 x float> %a, %b @@ -133,8 +133,8 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; Check x*y+y*x -> x*y*2. define <2 x double> @test11(<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: @test11 -; CHECK-NEXT: %factor = fmul fast <2 x double> %y, -; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x +; CHECK-NEXT: %factor = fmul fast <2 x double> %x, +; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %y ; CHECK-NEXT: ret <2 x double> %tmp1 %1 = fmul fast <2 x double> %x, %y diff --git a/test/Transforms/Reassociate/fast-basictest.ll b/test/Transforms/Reassociate/fast-basictest.ll index 64b74e3e8c1..c8a2bd9c193 100644 --- a/test/Transforms/Reassociate/fast-basictest.ll +++ b/test/Transforms/Reassociate/fast-basictest.ll @@ -108,7 +108,7 @@ define float @test7(float %A, float %B, float %C) { ; CHECK-LABEL: @test7 ; CHECK-NEXT: fadd fast float %C, %B ; CHECK-NEXT: fmul fast float %A, %A -; CHECK-NEXT: fmul fast float %1, %tmp2 +; CHECK-NEXT: fmul fast float %tmp3, %tmp2 ; CHECK-NEXT: ret float %aa = fmul fast float %A, %A diff --git a/test/Transforms/Reassociate/fast-fp-commute.ll b/test/Transforms/Reassociate/fast-fp-commute.ll index ad89607a21e..6565bbb3d20 100644 --- a/test/Transforms/Reassociate/fast-fp-commute.ll +++ b/test/Transforms/Reassociate/fast-fp-commute.ll @@ -33,8 +33,8 @@ define float @test2(float %x, float %y) { define float @test3(float %x, float %y) { ; CHECK-LABEL: test3 -; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00 -; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x +; CHECK-NEXT: %factor = fmul fast float %x, 2.000000e+00 +; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y ; CHECK-NEXT: ret float %tmp1 %1 = fmul fast float %x, %y diff --git a/test/Transforms/Reassociate/fast-multistep.ll b/test/Transforms/Reassociate/fast-multistep.ll index 45e15c7f353..aea997cdcbd 100644 --- a/test/Transforms/Reassociate/fast-multistep.ll +++ b/test/Transforms/Reassociate/fast-multistep.ll @@ -3,9 +3,9 @@ define float @fmultistep1(float %a, float %b, float %c) { ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)). ; CHECK-LABEL: @fmultistep1 -; CHECK-NEXT: fadd fast float %c, %b -; CHECK-NEXT: fmul fast float %a, %tmp2 -; CHECK-NEXT: fmul fast float %tmp3, %a +; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast float %c, %b +; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast float %a, %a +; CHECK-NEXT: fmul fast float [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret float %t0 = fmul fast float %a, %b diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll index c499646a8b6..5685bb94953 100644 --- a/test/Transforms/Reassociate/multistep.ll +++ b/test/Transforms/Reassociate/multistep.ll @@ -8,9 +8,9 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) { %t2 = mul i64 %a, %c %t3 = mul i64 %a, %t2 ; a*(a*c) %t4 = add i64 %t1, %t3 -; CHECK-NEXT: add i64 %c, %b -; CHECK-NEXT: mul i64 %a, %tmp{{.*}} -; CHECK-NEXT: mul i64 %tmp{{.*}}, %a +; CHECK-NEXT: [[TMP1:%tmp.*]] = add i64 %c, %b +; CHECK-NEXT: [[TMP2:%tmp.*]] = mul i64 %a, %a +; CHECK-NEXT: mul i64 [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret ret i64 %t4 } diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll new file mode 100644 index 00000000000..c2cdffce61e --- /dev/null +++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s +; CHECK-LABEL: faddsubAssoc1 +; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500 +; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500 +; CHECK: fsub fast half [[TMP2]], [[TMP1]] +; CHECK: ret +; Input is A op (B op C) +define half @faddsubAssoc1(half %a, half %b) { + %tmp1 = fmul fast half %b, 0xH4200 ; 3*b + %tmp2 = fmul fast half %a, 0xH4500 ; 5*a + %tmp3 = fmul fast half %b, 0xH4000 ; 2*b + %tmp4 = fsub fast half %tmp2, %tmp1 ; 5 * a - 3 * b + %tmp5 = fsub fast half %tmp3, %tmp4 ; 2 * b - ( 5 * a - 3 * b) + ret half %tmp5 ; = 5 * (b - a) +} + +; CHECK-LABEL: faddsubAssoc2 +; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500 +; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH3C00 +; CHECK: fadd fast half [[TMP2]], [[TMP1]] +; CHECK: ret +; Input is (A op B) op C +define half @faddsubAssoc2(half %a, half %b) { + %tmp1 = fmul fast half %b, 0xH4200 ; 3*b + %tmp2 = fmul fast half %a, 0xH4500 ; 5*a + %tmp3 = fmul fast half %b, 0xH4000 ; 2*b + %tmp4 = fadd fast half %tmp2, %tmp1 ; 5 * a + 3 * b + %tmp5 = fsub fast half %tmp4, %tmp3 ; (5 * a + 3 * b) - (2 * b) + ret half %tmp5 ; = 5 * a + b +} + diff --git a/test/Transforms/Reassociate/secondary.ll b/test/Transforms/Reassociate/secondary.ll index a52000ada53..388cd6bcb6f 100644 --- a/test/Transforms/Reassociate/secondary.ll +++ b/test/Transforms/Reassociate/secondary.ll @@ -6,7 +6,7 @@ ; CHECK: define ; CHECK-NOT: undef -; CHECK: %factor = mul i32 %tmp3, -2 +; CHECK: %factor = mul i32 %tmp3.neg, 2 ; CHECK-NOT: undef ; CHECK: } diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll index a22689805fb..0bed6f35880 100644 --- a/test/Transforms/Reassociate/xor_reassoc.ll +++ b/test/Transforms/Reassociate/xor_reassoc.ll @@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) { %xor1 = xor i32 %xor, %and ret i32 %xor1 ; CHECK-LABEL: @xor_special2( -; CHECK: %xor = xor i32 %y, 123 -; CHECK: %xor1 = xor i32 %xor, %x +; CHECK: %xor = xor i32 %x, 123 +; CHECK: %xor1 = xor i32 %xor, %y ; CHECK: ret i32 %xor1 }