From 357a71a8557165da4606283bf3d3949264f391ec Mon Sep 17 00:00:00 2001 From: James Molloy Date: Mon, 15 Aug 2016 08:04:56 +0000 Subject: [PATCH] [SimplifyCFG] Rewrite SinkThenElseCodeToEnd The new version has several advantages: 1) IMSHO it's more readable and neater 2) It handles loads and stores properly 3) It can handle any number of incoming blocks rather than just two. I'll be taking advantage of this in a followup patch. With this change we can now finally sink load-modify-store idioms such as: if (a) return *b += 3; else return *b += 4; => %z = load i32, i32* %y %.sink = select i1 %a, i32 5, i32 7 %b = add i32 %z, %.sink store i32 %b, i32* %y ret i32 %b When this works for switches it'll be even more powerful. llvm-svn: 278660 --- lib/Transforms/Utils/SimplifyCFG.cpp | 367 ++++++++++-------- test/CodeGen/ARM/avoid-cpsr-rmw.ll | 2 +- .../single-constant-use-preserves-dbgloc.ll | 3 +- .../SimplifyCFG/AArch64/prefer-fma.ll | 3 +- .../SimplifyCFG/sink-common-code.ll | 177 +++++++++ 5 files changed, 394 insertions(+), 158 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index e88d7ebaf6f..774a400bfb5 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1319,172 +1319,229 @@ HoistTerminator: return true; } +// Return true if V0 and V1 are equivalent. This handles the obvious cases +// where V0 == V1 and V0 and V1 are both identical instructions, but also +// handles loads and stores with identical operands. +// +// Because determining if two memory instructions are equivalent +// depends on control flow, the \c At0 and \c At1 parameters specify a +// location for the query. This function is essentially answering the +// query "If V0 were moved to At0, and V1 were moved to At1, are V0 and V1 +// equivalent?". In practice this means checking that moving V0 to At0 +// doesn't cross any other memory instructions. +static bool areValuesTriviallySame(Value *V0, BasicBlock::const_iterator At0, + Value *V1, BasicBlock::const_iterator At1) { + if (V0 == V1) + return true; + + // Also check for instructions that are identical but not pointer-identical. + // This can include load instructions that haven't been CSE'd. + if (!isa(V0) || !isa(V1)) + return false; + const auto *I0 = cast(V0); + const auto *I1 = cast(V1); + if (!I0->isIdenticalToWhenDefined(I1)) + return false; + + if (!I0->mayReadOrWriteMemory()) + return true; + + // Instructions that may read or write memory have extra restrictions. We + // must ensure we don't treat %a and %b as equivalent in code such as: + // + // %a = load %x + // store %x, 1 + // if (%c) { + // %b = load %x + // %d = add %b, 1 + // } else { + // %d = add %a, 1 + // } + + // Be conservative. We don't want to search the entire CFG between def + // and use; if the def isn't in the same block as the use just bail. + if (I0->getParent() != At0->getParent() || + I1->getParent() != At1->getParent()) + return false; + + // Again, be super conservative. Ideally we'd be able to query AliasAnalysis + // but we currently don't have that available. + auto WritesMemory = [](const Instruction &I) { + return I.mayReadOrWriteMemory(); + }; + if (std::any_of(std::next(I0->getIterator()), At0, WritesMemory)) + return false; + if (std::any_of(std::next(I1->getIterator()), At1, WritesMemory)) + return false; + return true; +} + +// Is it legal to replace the operand \c OpIdx of \c GEP with a PHI node? +static bool canReplaceGEPOperandWithPHI(const Instruction *GEP, + unsigned OpIdx) { + if (OpIdx == 0) + return true; + gep_type_iterator It = std::next(gep_type_begin(GEP), OpIdx - 1); + return !It->isStructTy(); +} + +// All blocks in Blocks unconditionally jump to a common successor. Analyze +// the last non-terminator instruction in each block and return true if it would +// be possible to sink them into their successor, creating one common +// instruction instead. Set NumPHIsRequired to the number of PHI nodes that +// would need to be created during sinking. +static bool canSinkLastInstruction(ArrayRef Blocks, + unsigned &NumPHIsRequired) { + SmallVector Insts; + for (auto *BB : Blocks) { + if (BB->getTerminator() == &BB->front()) + // Block was empty. + return false; + Insts.push_back(BB->getTerminator()->getPrevNode()); + } + + // Prune out obviously bad instructions to move. Any non-store instruction + // must have exactly one use, and we check later that use is by a single, + // common PHI instruction in the successor. + for (auto *I : Insts) { + // These instructions may change or break semantics if moved. + if (isa(I) || I->isEHPad() || isa(I) || + I->getType()->isTokenTy()) + return false; + // Apart from loads and stores, we won't move anything that could + // change memory or have sideeffects. + if (!isa(I) && !isa(I) && + (I->mayHaveSideEffects() || I->mayHaveSideEffects())) + return false; + // Everything must have only one use too, apart from stores which + // have no uses. + if (!isa(I) && !I->hasOneUse()) + return false; + } + + const Instruction *I0 = Insts.front(); + for (auto *I : Insts) + if (!I->isSameOperationAs(I0)) + return false; + + // If this isn't a store, check the only user is a single PHI. + if (!isa(I0)) { + auto *PNUse = dyn_cast(*I0->user_begin()); + if (!PNUse || + !all_of(Insts, [&PNUse](const Instruction *I) { + return *I->user_begin() == PNUse; + })) + return false; + } + + NumPHIsRequired = 0; + for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { + if (I0->getOperand(OI)->getType()->isTokenTy()) + // Don't touch any operand of token type. + return false; + auto SameAsI0 = [&I0, OI](const Instruction *I) { + return areValuesTriviallySame(I->getOperand(OI), I->getIterator(), + I0->getOperand(OI), I0->getIterator()); + }; + if (!all_of(Insts, SameAsI0)) { + if (isa(I0) && !canReplaceGEPOperandWithPHI(I0, OI)) + // We can't create a PHI from this GEP. + return false; + ++NumPHIsRequired; + } + } + return true; +} + +// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last +// instruction of every block in Blocks to their common successor, commoning +// into one instruction. +static void sinkLastInstruction(ArrayRef Blocks) { + unsigned Dummy; + (void)Dummy; + assert(canSinkLastInstruction(Blocks, Dummy) && + "Must analyze before transforming!"); + auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); + + // canSinkLastInstruction returning true guarantees that every block has at + // least one non-terminator instruction. + SmallVector Insts; + for (auto *BB : Blocks) + Insts.push_back(BB->getTerminator()->getPrevNode()); + + // We don't need to do any checking here; canSinkLastInstruction should have + // done it all for us. + Instruction *I0 = Insts.front(); + SmallVector NewOperands; + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { + // This check is different to that in canSinkLastInstruction. There, we + // cared about the global view once simplifycfg (and instcombine) have + // completed - it takes into account PHIs that become trivially + // simplifiable. However here we need a more local view; if an operand + // differs we create a PHI and rely on instcombine to clean up the very + // small mess we may make. + bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + return I->getOperand(O) != I0->getOperand(O); + }); + if (!NeedPHI) { + NewOperands.push_back(I0->getOperand(O)); + continue; + } + + // Create a new PHI in the successor block and populate it. + auto *Op = I0->getOperand(O); + assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); + auto *PN = PHINode::Create(Op->getType(), Insts.size(), + Op->getName() + ".sink", &BBEnd->front()); + for (auto *I : Insts) + PN->addIncoming(I->getOperand(O), I->getParent()); + NewOperands.push_back(PN); + } + + // Arbitrarily use I0 as the new "common" instruction; remap its operands + // and move it to the start of the successor block. + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) + I0->getOperandUse(O).set(NewOperands[O]); + I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + if (!isa(I0)) { + // canSinkLastInstruction checked that all instructions were used by + // one and only one PHI node. Find that now, RAUW it to our common + // instruction and nuke it. + assert(I0->hasOneUse()); + auto *PN = cast(*I0->user_begin()); + PN->replaceAllUsesWith(I0); + PN->eraseFromParent(); + } + + // Finally nuke all instructions apart from the common instruction. + for (auto *I : Insts) + if (I != I0) + I->eraseFromParent(); +} + /// Given an unconditional branch that goes to BBEnd, /// check whether BBEnd has only two predecessors and the other predecessor /// ends with an unconditional branch. If it is true, sink any common code /// in the two predecessors to BBEnd. static bool SinkThenElseCodeToEnd(BranchInst *BI1) { assert(BI1->isUnconditional()); - BasicBlock *BB1 = BI1->getParent(); BasicBlock *BBEnd = BI1->getSuccessor(0); - // Check that BBEnd has two predecessors and the other predecessor ends with - // an unconditional branch. - pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd); - BasicBlock *Pred0 = *PI++; - if (PI == PE) // Only one predecessor. + SmallVector Blocks; + for (auto *BB : predecessors(BBEnd)) + Blocks.push_back(BB); + if (Blocks.size() != 2 || + !all_of(Blocks, [](const BasicBlock *BB) { + auto *BI = dyn_cast(BB->getTerminator()); + return BI && BI->isUnconditional(); + })) return false; - BasicBlock *Pred1 = *PI++; - if (PI != PE) // More than two predecessors. - return false; - BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0; - BranchInst *BI2 = dyn_cast(BB2->getTerminator()); - if (!BI2 || !BI2->isUnconditional()) - return false; - - // Gather the PHI nodes in BBEnd. - SmallDenseMap, PHINode *> JointValueMap; - Instruction *FirstNonPhiInBBEnd = nullptr; - for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) { - if (PHINode *PN = dyn_cast(I)) { - Value *BB1V = PN->getIncomingValueForBlock(BB1); - Value *BB2V = PN->getIncomingValueForBlock(BB2); - JointValueMap[std::make_pair(BB1V, BB2V)] = PN; - } else { - FirstNonPhiInBBEnd = &*I; - break; - } - } - if (!FirstNonPhiInBBEnd) - return false; - - // This does very trivial matching, with limited scanning, to find identical - // instructions in the two blocks. We scan backward for obviously identical - // instructions in an identical order. - BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(), - RE1 = BB1->getInstList().rend(), - RI2 = BB2->getInstList().rbegin(), - RE2 = BB2->getInstList().rend(); - // Skip debug info. - while (RI1 != RE1 && isa(&*RI1)) - ++RI1; - if (RI1 == RE1) - return false; - while (RI2 != RE2 && isa(&*RI2)) - ++RI2; - if (RI2 == RE2) - return false; - // Skip the unconditional branches. - ++RI1; - ++RI2; bool Changed = false; - while (RI1 != RE1 && RI2 != RE2) { - // Skip debug info. - while (RI1 != RE1 && isa(&*RI1)) - ++RI1; - if (RI1 == RE1) - return Changed; - while (RI2 != RE2 && isa(&*RI2)) - ++RI2; - if (RI2 == RE2) - return Changed; - - Instruction *I1 = &*RI1, *I2 = &*RI2; - auto InstPair = std::make_pair(I1, I2); - // I1 and I2 should have a single use in the same PHI node, and they - // perform the same operation. - // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa(I1) || isa(I2) || isa(I1) || - isa(I2) || I1->isEHPad() || I2->isEHPad() || - isa(I1) || isa(I2) || - I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || - I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || - !I1->hasOneUse() || !I2->hasOneUse() || !JointValueMap.count(InstPair)) - return Changed; - - // Check whether we should swap the operands of ICmpInst. - // TODO: Add support of communativity. - ICmpInst *ICmp1 = dyn_cast(I1), *ICmp2 = dyn_cast(I2); - bool SwapOpnds = false; - if (ICmp1 && ICmp2 && ICmp1->getOperand(0) != ICmp2->getOperand(0) && - ICmp1->getOperand(1) != ICmp2->getOperand(1) && - (ICmp1->getOperand(0) == ICmp2->getOperand(1) || - ICmp1->getOperand(1) == ICmp2->getOperand(0))) { - ICmp2->swapOperands(); - SwapOpnds = true; - } - if (!I1->isSameOperationAs(I2)) { - if (SwapOpnds) - ICmp2->swapOperands(); - return Changed; - } - - // The operands should be either the same or they need to be generated - // with a PHI node after sinking. We only handle the case where there is - // a single pair of different operands. - Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr; - unsigned Op1Idx = ~0U; - for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { - if (I1->getOperand(I) == I2->getOperand(I)) - continue; - // Early exit if we have more-than one pair of different operands or if - // we need a PHI node to replace a constant. - if (Op1Idx != ~0U || isa(I1->getOperand(I)) || - isa(I2->getOperand(I))) { - // If we can't sink the instructions, undo the swapping. - if (SwapOpnds) - ICmp2->swapOperands(); - return Changed; - } - DifferentOp1 = I1->getOperand(I); - Op1Idx = I; - DifferentOp2 = I2->getOperand(I); - } - - DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n"); - DEBUG(dbgs() << " " << *I2 << "\n"); - - // We insert the pair of different operands to JointValueMap and - // remove (I1, I2) from JointValueMap. - if (Op1Idx != ~0U) { - auto &NewPN = JointValueMap[std::make_pair(DifferentOp1, DifferentOp2)]; - if (!NewPN) { - NewPN = - PHINode::Create(DifferentOp1->getType(), 2, - DifferentOp1->getName() + ".sink", &BBEnd->front()); - NewPN->addIncoming(DifferentOp1, BB1); - NewPN->addIncoming(DifferentOp2, BB2); - DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); - } - // I1 should use NewPN instead of DifferentOp1. - I1->setOperand(Op1Idx, NewPN); - } - PHINode *OldPN = JointValueMap[InstPair]; - JointValueMap.erase(InstPair); - - // We need to update RE1 and RE2 if we are going to sink the first - // instruction in the basic block down. - bool UpdateRE1 = (I1 == &BB1->front()), UpdateRE2 = (I2 == &BB2->front()); - // Sink the instruction. - BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(), - BB1->getInstList(), I1); - if (!OldPN->use_empty()) - OldPN->replaceAllUsesWith(I1); - OldPN->eraseFromParent(); - - if (!I2->use_empty()) - I2->replaceAllUsesWith(I1); - I1->intersectOptionalDataWith(I2); - // TODO: Use combineMetadata here to preserve what metadata we can - // (analogous to the hoisting case above). - I2->eraseFromParent(); - - if (UpdateRE1) - RE1 = BB1->getInstList().rend(); - if (UpdateRE2) - RE2 = BB2->getInstList().rend(); - FirstNonPhiInBBEnd = &*I1; + unsigned NumPHIsToInsert; + while (canSinkLastInstruction(Blocks, NumPHIsToInsert) && NumPHIsToInsert <= 1) { + sinkLastInstruction(Blocks); NumSinkCommons++; Changed = true; } diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll index 79e8e68e2f5..78d3ebf371a 100644 --- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll +++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -106,7 +106,7 @@ if.then: if.else: store i32 3, i32* %p, align 4 - %incdec.ptr5 = getelementptr inbounds i32, i32* %p, i32 2 + %incdec.ptr5 = getelementptr inbounds i32, i32* %p, i32 3 store i32 5, i32* %incdec.ptr1, align 4 store i32 6, i32* %incdec.ptr5, align 4 br label %if.end diff --git a/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll b/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll index a992ce3bf85..e22614388b8 100644 --- a/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll +++ b/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll @@ -9,8 +9,9 @@ ; return -1; ; } +; CHECK: mvnlt ; CHECK: .loc 1 6 7 -; CHECK: mvn +; CHECK: strlt target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7--linux-gnueabihf" diff --git a/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll b/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll index cfbe219c4c2..7144da2d4bc 100644 --- a/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll +++ b/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll @@ -29,7 +29,8 @@ if.else: ; preds = %entry %4 = load double, double* %a, align 8 %mul1 = fmul fast double %1, %4 %sub1 = fsub fast double %mul1, %0 - store double %sub1, double* %y, align 8 + %gep1 = getelementptr double, double* %y, i32 1 + store double %sub1, double* %gep1, align 8 br label %if.end if.end: ; preds = %if.else, %if.then diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll index cdb6ed29d85..4277ae0e33a 100644 --- a/test/Transforms/SimplifyCFG/sink-common-code.ll +++ b/test/Transforms/SimplifyCFG/sink-common-code.ll @@ -81,3 +81,180 @@ if.end: ; CHECK: call ; CHECK: add ; CHECK-NOT: br + +define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test4 +; CHECK: select +; CHECK: store +; CHECK-NOT: store + +define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test5 +; CHECK: store volatile +; CHECK: store + +define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test6 +; CHECK: select +; CHECK: store volatile +; CHECK-NOT: store + +define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %z = load volatile i32, i32* %y + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test7 +; CHECK-DAG: select +; CHECK-DAG: load volatile +; CHECK: store volatile +; CHECK-NOT: load +; CHECK-NOT: store + +; %z and %w are in different blocks. We shouldn't sink the add because +; there may be intervening memory instructions. +define i32 @test8(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + %z = load volatile i32, i32* %y + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test8 +; CHECK: add +; CHECK: add + +; The extra store in %if.then means %z and %w are not equivalent. +define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + store i32 7, i32* %p + %z = load volatile i32, i32* %y + store i32 6, i32* %p + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test9 +; CHECK: add +; CHECK: add + +%struct.anon = type { i32, i32 } + +; The GEP indexes a struct type so cannot have a variable last index. +define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 5 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 + store volatile i32 %x, i32* %gepa + br label %if.end + +if.else: + %dummy1 = add i32 %x, 6 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + store volatile i32 %x, i32* %gepb + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test10 +; CHECK: getelementptr +; CHECK: getelementptr +; CHECK: phi +; CHECK: store volatile