diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 60302f77947..d4c953514ce 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -874,7 +874,7 @@ public: /// to: /// %V1 = load i32* %Alloca -> will be mem2reg'd /// %V2 = load i32* %Other -/// %V = select i1 %cond, i32 %V1, i32* %V2 +/// %V = select i1 %cond, i32 %V1, i32 %V2 /// /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. @@ -887,7 +887,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { LoadInst *LI = dyn_cast(*UI); if (LI == 0 || LI->isVolatile()) return false; - // Both operands to the select need to be deferencable, either absolutely + // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, LI->getAlignment(), TD)) @@ -900,6 +900,77 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { return true; } +/// isSafePHIToSpeculate - PHI instructions that use an alloca and are +/// subsequently loaded can be rewritten to load both input pointers in the pred +/// blocks and then PHI the results, allowing the load of the alloca to be +/// promoted. +/// From this: +/// %P2 = phi [i32* %Alloca, i32* %Other] +/// %V = load i32* %P2 +/// to: +/// %V1 = load i32* %Alloca -> will be mem2reg'd +/// ... +/// %V2 = load i32* %Other +/// ... +/// %V = phi [i32 %V1, i32 %V2] +/// +/// We can do this to a select if its only uses are loads and if the operand to +/// the select can be loaded unconditionally. +static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { + // For now, we can only do this promotion if the load is in the same block as + // the PHI, and if there are no stores between the phi and load. + // TODO: Allow recursive phi users. + // TODO: Allow stores. + BasicBlock *BB = PN->getParent(); + unsigned MaxAlign = 0; + for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast(*UI); + if (LI == 0 || LI->isVolatile()) return false; + + // For now we only allow loads in the same block as the PHI. This is a + // common case that happens when instcombine merges two loads through a PHI. + if (LI->getParent() != BB) return false; + + // Ensure that there are no instructions between the PHI and the load that + // could store. + for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI) + if (BBI->mayWriteToMemory()) + return false; + + MaxAlign = std::max(MaxAlign, LI->getAlignment()); + } + + // Okay, we know that we have one or more loads in the same block as the PHI. + // We can transform this if it is safe to push the loads into the predecessor + // blocks. The only thing to watch out for is that we can't put a possibly + // trapping load in the predecessor if it is a critical edge. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + + // If the predecessor has a single successor, then the edge isn't critical. + if (Pred->getTerminator()->getNumSuccessors() == 1) + continue; + + Value *InVal = PN->getIncomingValue(i); + + // If the InVal is an invoke in the pred, we can't put a load on the edge. + if (InvokeInst *II = dyn_cast(InVal)) + if (II->getParent() == Pred) + return false; + + // If this pointer is always safe to load, or if we can prove that there is + // already a load in the block, then we can move the load to the pred block. + if (InVal->isDereferenceablePointer() || + isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD)) + continue; + + return false; + } + + return true; +} + /// tryToMakeAllocaBePromotable - This returns true if the alloca only has /// direct (non-volatile) loads and stores to it. If the alloca is close but @@ -946,6 +1017,21 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { continue; } + if (PHINode *PN = dyn_cast(U)) { + if (PN->use_empty()) { // Dead PHIs can be stripped. + InstsToRewrite.insert(PN); + continue; + } + + // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads + // in the pred blocks, then we can transform this by rewriting the PHI. + if (!isSafePHIToSpeculate(PN, TD)) + return false; + + InstsToRewrite.insert(PN); + continue; + } + return false; } @@ -957,35 +1043,80 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // If we have instructions that need to be rewritten for this to be promotable // take care of it now. for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { - // Selects in InstsToRewrite only have load uses. Rewrite each as two - // loads with a new select. - SelectInst *SI = cast(InstsToRewrite[i]); - - for (Value::use_iterator UI = SI->use_begin(), E = SI->use_end(); UI != E;){ - LoadInst *LI = cast(*UI++); + if (SelectInst *SI = dyn_cast(InstsToRewrite[i])) { + // Selects in InstsToRewrite only have load uses. Rewrite each as two + // loads with a new select. + while (!SI->use_empty()) { + LoadInst *LI = cast(SI->use_back()); - IRBuilder<> Builder(LI); - LoadInst *TrueLoad = - Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); - LoadInst *FalseLoad = - Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); - - // Transfer alignment and TBAA info if present. - TrueLoad->setAlignment(LI->getAlignment()); - FalseLoad->setAlignment(LI->getAlignment()); - if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { - TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); - FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + IRBuilder<> Builder(LI); + LoadInst *TrueLoad = + Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); + LoadInst *FalseLoad = + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); + + // Transfer alignment and TBAA info if present. + TrueLoad->setAlignment(LI->getAlignment()); + FalseLoad->setAlignment(LI->getAlignment()); + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { + TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + } + + Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); + V->takeName(LI); + LI->replaceAllUsesWith(V); + LI->eraseFromParent(); } - - Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); - V->takeName(LI); - LI->replaceAllUsesWith(V); + + // Now that all the loads are gone, the select is gone too. + SI->eraseFromParent(); + continue; + } + + // Otherwise, we have a PHI node which allows us to push the loads into the + // predecessors. + PHINode *PN = cast(InstsToRewrite[i]); + if (PN->use_empty()) { + PN->eraseFromParent(); + continue; + } + + const Type *LoadTy = cast(PN->getType())->getElementType(); + PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN); + + // Get the TBAA tag and alignment to use from one of the loads. It doesn't + // matter which one we get and if any differ, it doesn't matter. + LoadInst *SomeLoad = cast(PN->use_back()); + MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); + unsigned Align = SomeLoad->getAlignment(); + + // Rewrite all loads of the PN to use the new PHI. + while (!PN->use_empty()) { + LoadInst *LI = cast(PN->use_back()); + LI->replaceAllUsesWith(NewPN); LI->eraseFromParent(); } - // Now that all the loads are gone, the select is gone too. - SI->eraseFromParent(); + // Inject loads into all of the pred blocks. Keep track of which blocks we + // insert them into in case we have multiple edges from the same block. + DenseMap InsertedLoads; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + LoadInst *&Load = InsertedLoads[Pred]; + if (Load == 0) { + Load = new LoadInst(PN->getIncomingValue(i), + PN->getName() + "." + Pred->getName(), + Pred->getTerminator()); + Load->setAlignment(Align); + if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); + } + + NewPN->addIncoming(Load, Pred); + } + + PN->eraseFromParent(); } ++NumAdjusted; diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll index f612ed55a2d..fa3972de90a 100644 --- a/test/Transforms/ScalarRepl/phi-select.ll +++ b/test/Transforms/ScalarRepl/phi-select.ll @@ -25,8 +25,8 @@ entry: } ; CHECK: @test2 -; CHECK: %A.0 = alloca i32 -; CHECK: %A.1 = alloca i32 +; CHECK: %X.ld = phi i32 [ 1, %entry ], [ 2, %T ] +; CHECK-NEXT: ret i32 %X.ld define i32 @test2(i1 %c) { entry: %A = alloca {i32, i32} @@ -35,7 +35,7 @@ entry: br i1 %c, label %T, label %F T: %C = getelementptr {i32, i32}* %A, i32 0, i32 1 - store i32 2, i32* %B + store i32 2, i32* %C br label %F F: %X = phi i32* [%B, %entry], [%C, %T] @@ -130,3 +130,24 @@ define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp { ; CHECK: ret i32 %r } +;; Promote allocs that are PHI'd together by moving the loads. +define i32 @test8(i32 %x) nounwind readnone ssp { +; CHECK: @test8 +; CHECK-NOT: load i32 +; CHECK-NOT: store i32 +; CHECK: %p.0.ld = phi i32 [ 2, %entry ], [ 1, %T ] +; CHECK-NEXT: ret i32 %p.0.ld +entry: + %a = alloca i32, align 8 + %b = alloca i32, align 8 + store i32 1, i32* %a, align 8 + store i32 2, i32* %b, align 8 + %c = icmp eq i32 %x, 0 + br i1 %c, label %T, label %Cont +T: + br label %Cont +Cont: + %p.0 = phi i32* [%b, %entry],[%a, %T] + %r = load i32* %p.0, align 8 + ret i32 %r +}