diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 42b24a65b45..6fff12c0b0d 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -34,14 +34,6 @@ namespace { const TargetInstrInfo *TII; const TargetLowering *TLI; - /// ChangedMBBs - BBs which are modified by OptimizeIntraLoopEdges. - SmallPtrSet ChangedMBBs; - - /// UncondJmpMBBs - A list of BBs which are in loops and end with - /// unconditional branches. - SmallVector, 4> - UncondJmpMBBs; - public: static char ID; CodePlacementOpt() : MachineFunctionPass(&ID) {} @@ -58,7 +50,19 @@ namespace { } private: - bool OptimizeIntraLoopEdges(); + bool HasFallthrough(MachineBasicBlock *MBB); + bool HasAnalyzableTerminator(MachineBasicBlock *MBB); + void Splice(MachineFunction &MF, + MachineFunction::iterator InsertPt, + MachineFunction::iterator Begin, + MachineFunction::iterator End); + void UpdateTerminator(MachineBasicBlock *MBB); + bool EliminateUnconditionalJumpsToTop(MachineFunction &MF, + MachineLoop *L); + bool MoveDiscontiguousLoopBlocks(MachineFunction &MF, + MachineLoop *L); + bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L); + bool OptimizeIntraLoopEdges(MachineFunction &MF); bool AlignLoops(MachineFunction &MF); bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align); }; @@ -70,168 +74,354 @@ FunctionPass *llvm::createCodePlacementOptPass() { return new CodePlacementOpt(); } -/// OptimizeBackEdges - Place loop back edges to move unconditional branches -/// out of the loop. +/// HasFallthrough - Test whether the given branch has a fallthrough, either as +/// a plain fallthrough or as a fallthrough case of a conditional branch. /// -/// A: -/// ... -/// +bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) + return false; + // This conditional branch has no fallthrough. + if (FBB) + return false; + // An unconditional branch has no fallthrough. + if (Cond.empty() && TBB) + return false; + // It has a fallthrough. + return true; +} + +/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB. +/// This is called before major changes are begun to test whether it will be +/// possible to complete the changes. /// -/// B: --> loop header -/// ... -/// jcc C, [exit] +/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed +/// whenever possible. /// -/// C: -/// ... -/// jmp B -/// -/// ==> -/// -/// A: -/// ... -/// jmp B -/// -/// C: -/// ... -/// -/// -/// B: --> loop header -/// ... -/// jcc C, [exit] -/// -bool CodePlacementOpt::OptimizeIntraLoopEdges() { - if (!TLI->shouldOptimizeCodePlacement()) +bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) { + // Conservatively ignore EH landing pads. + if (MBB->isLandingPad()) return false; + + // Ignore blocks which look like they might have EH-related control flow. + // At the time of this writing, there are blocks which AnalyzeBranch + // thinks end in single uncoditional branches, yet which have two CFG + // successors. Code in this file is not prepared to reason about such things. + if (!MBB->empty() && MBB->back().getOpcode() == TargetInstrInfo::EH_LABEL) return false; - bool Changed = false; - for (unsigned i = 0, e = UncondJmpMBBs.size(); i != e; ++i) { - MachineBasicBlock *MBB = UncondJmpMBBs[i].first; - MachineBasicBlock *SuccMBB = UncondJmpMBBs[i].second; - MachineLoop *L = MLI->getLoopFor(MBB); - assert(L && "BB is expected to be in a loop!"); + // Aggressively handle return blocks and similar constructs. + if (MBB->succ_empty()) return true; - if (ChangedMBBs.count(MBB)) { - // BB has been modified, re-analyze. - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty()) - continue; - if (MLI->getLoopFor(TBB) != L || TBB->isLandingPad()) - continue; - SuccMBB = TBB; + // Ask the target's AnalyzeBranch if it can handle this block. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + // Make the the terminator is understood. + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) + return false; + // Make sure we have the option of reversing the condition. + if (!Cond.empty() && TII->ReverseBranchCondition(Cond)) + return false; + return true; +} + +/// Splice - Move the sequence of instructions [Begin,End) to just before +/// InsertPt. Update branch instructions as needed to account for broken +/// fallthrough edges and to take advantage of newly exposed fallthrough +/// opportunities. +/// +void CodePlacementOpt::Splice(MachineFunction &MF, + MachineFunction::iterator InsertPt, + MachineFunction::iterator Begin, + MachineFunction::iterator End) { + assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() && + "Splice can't change the entry block!"); + MachineFunction::iterator OldBeginPrior = prior(Begin); + MachineFunction::iterator OldEndPrior = prior(End); + + MF.splice(InsertPt, Begin, End); + + UpdateTerminator(prior(Begin)); + UpdateTerminator(OldBeginPrior); + UpdateTerminator(OldEndPrior); +} + +/// UpdateTerminator - Update the terminator instructions in MBB to account +/// for changes to the layout. If the block previously used a fallthrough, +/// it may now need a branch, and if it previously used branching it may now +/// be able to use a fallthrough. +/// +void CodePlacementOpt::UpdateTerminator(MachineBasicBlock *MBB) { + // A block with no successors has no concerns with fall-through edges. + if (MBB->succ_empty()) return; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + bool B = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond); + (void) B; + assert(!B && "UpdateTerminators requires analyzable predecessors!"); + if (Cond.empty()) { + if (TBB) { + // The block has an unconditional branch. If its successor is now + // its layout successor, delete the branch. + if (MBB->isLayoutSuccessor(TBB)) + TII->RemoveBranch(*MBB); } else { - assert(MLI->getLoopFor(SuccMBB) == L && - "Successor is not in the same loop!"); + // The block has an unconditional fallthrough. If its successor is not + // its layout successor, insert a branch. + TBB = *MBB->succ_begin(); + if (!MBB->isLayoutSuccessor(TBB)) + TII->InsertBranch(*MBB, TBB, 0, Cond); } + } else { + if (FBB) { + // The block has a non-fallthrough conditional branch. If one of its + // successors is its layout successor, rewrite it to a fallthrough + // conditional branch. + if (MBB->isLayoutSuccessor(TBB)) { + TII->RemoveBranch(*MBB); + TII->ReverseBranchCondition(Cond); + TII->InsertBranch(*MBB, FBB, 0, Cond); + } else if (MBB->isLayoutSuccessor(FBB)) { + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, TBB, 0, Cond); + } + } else { + // The block has a fallthrough conditional branch. + MachineBasicBlock *MBBA = *MBB->succ_begin(); + MachineBasicBlock *MBBB = *next(MBB->succ_begin()); + if (MBBA == TBB) std::swap(MBBB, MBBA); + if (MBB->isLayoutSuccessor(TBB)) { + TII->RemoveBranch(*MBB); + TII->ReverseBranchCondition(Cond); + TII->InsertBranch(*MBB, MBBA, 0, Cond); + } else if (!MBB->isLayoutSuccessor(MBBA)) { + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, TBB, MBBA, Cond); + } + } + } +} - if (MBB->isLayoutSuccessor(SuccMBB)) { - // Successor is right after MBB, just eliminate the unconditional jmp. - // Can this happen? - TII->RemoveBranch(*MBB); - ChangedMBBs.insert(MBB); - ++NumIntraElim; +/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump +/// to the loop top to the top of the loop so that they have a fall through. +/// This can introduce a branch on entry to the loop, but it can eliminate a +/// branch within the loop. See the @simple case in +/// test/CodeGen/X86/loop_blocks.ll for an example of this. +bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, + MachineLoop *L) { + bool Changed = false; + MachineBasicBlock *TopMBB = L->getTopBlock(); + + bool BotHasFallthrough = HasFallthrough(L->getBottomBlock()); + + if (TopMBB == MF.begin() || + HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) { + new_top: + for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(), + PE = TopMBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + if (Pred == TopMBB) continue; + if (HasFallthrough(Pred)) continue; + if (!L->contains(Pred)) continue; + + // Verify that we can analyze all the loop entry edges before beginning + // any changes which will require us to be able to analyze them. + if (Pred == MF.begin()) + continue; + if (!HasAnalyzableTerminator(Pred)) + continue; + if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred)))) + continue; + + // Move the block. Changed = true; - continue; - } - // Now check if the predecessor is fallthrough from any BB. If there is, - // that BB should be from outside the loop since edge will become a jmp. - bool OkToMove = true; - MachineBasicBlock *FtMBB = 0, *FtTBB = 0, *FtFBB = 0; - SmallVector FtCond; - for (MachineBasicBlock::pred_iterator PI = SuccMBB->pred_begin(), - PE = SuccMBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *PredMBB = *PI; - if (PredMBB->isLayoutSuccessor(SuccMBB)) { - if (TII->AnalyzeBranch(*PredMBB, FtTBB, FtFBB, FtCond)) { - OkToMove = false; + // Move it and all the blocks that can reach it via fallthrough edges + // exclusively, to keep existing fallthrough edges intact. + MachineFunction::iterator Begin = Pred; + MachineFunction::iterator End = next(Begin); + while (Begin != MF.begin()) { + MachineFunction::iterator Prior = prior(Begin); + if (Prior == MF.begin()) + break; + // Stop when a non-fallthrough edge is found. + if (!HasFallthrough(Prior)) + break; + // Stop if a block which could fall-through out of the loop is found. + if (Prior->isSuccessor(End)) + break; + // If we've reached the top, stop scanning. + if (Prior == MachineFunction::iterator(TopMBB)) { + // We know top currently has a fall through (because we just checked + // it) which would be lost if we do the transformation, so it isn't + // worthwhile to do the transformation unless it would expose a new + // fallthrough edge. + if (!Prior->isSuccessor(End)) + goto next_pred; + // Otherwise we can stop scanning and procede to move the blocks. break; } - if (!FtTBB) - FtTBB = SuccMBB; - else if (!FtFBB) { - assert(FtFBB != SuccMBB && "Unexpected control flow!"); - FtFBB = SuccMBB; - } - - // A fallthrough. - FtMBB = PredMBB; - MachineLoop *PL = MLI->getLoopFor(PredMBB); - if (PL && (PL == L || PL->getLoopDepth() >= L->getLoopDepth())) - OkToMove = false; - - break; - } - } - - if (!OkToMove) - continue; - - // Is it profitable? If SuccMBB can fallthrough itself, that can be changed - // into a jmp. - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - if (TII->AnalyzeBranch(*SuccMBB, TBB, FBB, Cond)) - continue; - if (!TBB && Cond.empty()) - TBB = next(MachineFunction::iterator(SuccMBB)); - else if (!FBB && !Cond.empty()) - FBB = next(MachineFunction::iterator(SuccMBB)); - - // This calculate the cost of the transformation. Also, it finds the *only* - // intra-loop edge if there is one. - int Cost = 0; - bool HasOneIntraSucc = true; - MachineBasicBlock *IntraSucc = 0; - for (MachineBasicBlock::succ_iterator SI = SuccMBB->succ_begin(), - SE = SuccMBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SSMBB = *SI; - if (MLI->getLoopFor(SSMBB) == L) { - if (!IntraSucc) - IntraSucc = SSMBB; - else - HasOneIntraSucc = false; + // If we hit a switch or something complicated, don't move anything + // for this predecessor. + if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior)))) + break; + // Ok, the block prior to Begin will be moved along with the rest. + // Extend the range to include it. + Begin = Prior; + ++NumIntraMoved; } - if (SuccMBB->isLayoutSuccessor(SSMBB)) - // This will become a jmp. - ++Cost; - else if (MBB->isLayoutSuccessor(SSMBB)) { - // One of the successor will become the new fallthrough. - if (SSMBB == FBB) { - FBB = 0; - --Cost; - } else if (!FBB && SSMBB == TBB && Cond.empty()) { - TBB = 0; - --Cost; - } else if (!Cond.empty() && !TII->ReverseBranchCondition(Cond)) { - assert(SSMBB == TBB); - TBB = FBB; - FBB = 0; - --Cost; - } - } - } - if (Cost) - continue; + // Move the blocks. + Splice(MF, TopMBB, Begin, End); - // Now, let's move the successor to below the BB to eliminate the jmp. - SuccMBB->moveAfter(MBB); - TII->RemoveBranch(*MBB); - TII->RemoveBranch(*SuccMBB); - if (TBB) - TII->InsertBranch(*SuccMBB, TBB, FBB, Cond); - ChangedMBBs.insert(MBB); - ChangedMBBs.insert(SuccMBB); - if (FtMBB) { - TII->RemoveBranch(*FtMBB); - TII->InsertBranch(*FtMBB, FtTBB, FtFBB, FtCond); - ChangedMBBs.insert(FtMBB); + // Update TopMBB. + TopMBB = L->getTopBlock(); + + // We have a new loop top. Iterate on it. We shouldn't have to do this + // too many times if BranchFolding has done a reasonable job. + goto new_top; + next_pred:; } - Changed = true; } - ++NumIntraMoved; + // If the loop previously didn't exit with a fall-through and it now does, + // we eliminated a branch. + if (Changed && + !BotHasFallthrough && + HasFallthrough(L->getBottomBlock())) { + ++NumIntraElim; + BotHasFallthrough = true; + } + + return Changed; +} + +/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the +/// portion of the loop contiguous with the header. This usually makes the loop +/// contiguous, provided that AnalyzeBranch can handle all the relevant +/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll +/// for an example of this. +bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, + MachineLoop *L) { + bool Changed = false; + MachineBasicBlock *TopMBB = L->getTopBlock(); + MachineBasicBlock *BotMBB = L->getBottomBlock(); + + // Determine a position to move orphaned loop blocks to. If TopMBB is not + // entered via fallthrough and BotMBB is exited via fallthrough, prepend them + // to the top of the loop to avoid loosing that fallthrough. Otherwise append + // them to the bottom, even if it previously had a fallthrough, on the theory + // that it's worth an extra branch to keep the loop contiguous. + MachineFunction::iterator InsertPt = next(MachineFunction::iterator(BotMBB)); + bool InsertAtTop = false; + if (TopMBB != MF.begin() && + !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) && + HasFallthrough(BotMBB)) { + InsertPt = TopMBB; + InsertAtTop = true; + } + + // Keep a record of which blocks are in the portion of the loop contiguous + // with the loop header. + SmallPtrSet ContiguousBlocks; + for (MachineFunction::iterator I = TopMBB, + E = next(MachineFunction::iterator(BotMBB)); I != E; ++I) + ContiguousBlocks.insert(I); + + // Find non-contigous blocks and fix them. + if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt))) + for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end(); + BI != BE; ++BI) { + MachineBasicBlock *BB = *BI; + + // Verify that we can analyze all the loop entry edges before beginning + // any changes which will require us to be able to analyze them. + if (!HasAnalyzableTerminator(BB)) + continue; + if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB)))) + continue; + + // If the layout predecessor is part of the loop, this block will be + // processed along with it. This keeps them in their relative order. + if (BB != MF.begin() && + L->contains(prior(MachineFunction::iterator(BB)))) + continue; + + // Check to see if this block is already contiguous with the main + // portion of the loop. + if (!ContiguousBlocks.insert(BB)) + continue; + + // Move the block. + Changed = true; + + // Process this block and all loop blocks contiguous with it, to keep + // them in their relative order. + MachineFunction::iterator Begin = BB; + MachineFunction::iterator End = next(MachineFunction::iterator(BB)); + for (; End != MF.end(); ++End) { + if (!L->contains(End)) break; + if (!HasAnalyzableTerminator(End)) break; + ContiguousBlocks.insert(End); + ++NumIntraMoved; + } + + // If we're inserting at the bottom of the loop, and the code we're + // moving originally had fall-through successors, bring the sucessors + // up with the loop blocks to preserve the fall-through edges. + if (!InsertAtTop) + for (; End != MF.end(); ++End) { + if (L->contains(End)) break; + if (!HasAnalyzableTerminator(End)) break; + if (!HasFallthrough(prior(End))) break; + } + + // Move the blocks. This may invalidate TopMBB and/or BotMBB, but + // we don't need them anymore at this point. + Splice(MF, InsertPt, Begin, End); + } + + return Changed; +} + +/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize +/// intra-loop branching and to form contiguous loops. +/// +/// This code takes the approach of making minor changes to the existing +/// layout to fix specific loop-oriented problems. Also, it depends on +/// AnalyzeBranch, which can't understand complex control instructions. +/// +bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, + MachineLoop *L) { + bool Changed = false; + + // Do optimization for nested loops. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); + + // Do optimization for this loop. + Changed |= EliminateUnconditionalJumpsToTop(MF, L); + Changed |= MoveDiscontiguousLoopBlocks(MF, L); + + return Changed; +} + +/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize +/// intra-loop branching and to form contiguous loops. +/// +bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { + bool Changed = false; + + if (!TLI->shouldOptimizeCodePlacement()) + return Changed; + + // Do optimization for each loop in the function. + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + I != E; ++I) + if (!(*I)->getParentLoop()) + Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); + return Changed; } @@ -255,6 +445,8 @@ bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { return Changed; } +/// AlignLoop - Align loop headers to target preferred alignments. +/// bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align) { bool Changed = false; @@ -263,17 +455,7 @@ bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L, for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) Changed |= AlignLoop(MF, *I, Align); - MachineBasicBlock *TopMBB = L->getHeader(); - if (TopMBB == MF.begin()) return Changed; - - MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(TopMBB)); - while (MLI->getLoopFor(PredMBB) == L) { - TopMBB = PredMBB; - if (TopMBB == MF.begin()) return Changed; - PredMBB = prior(MachineFunction::iterator(TopMBB)); - } - - TopMBB->setAlignment(Align); + L->getTopBlock()->setAlignment(Align); Changed = true; ++NumLoopsAligned; @@ -288,30 +470,9 @@ bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); - // Analyze the BBs first and keep track of BBs that - // end with an unconditional jmp to another block in the same loop. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = I; - if (MBB->isLandingPad()) - continue; - MachineLoop *L = MLI->getLoopFor(MBB); - if (!L) - continue; - - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond) || !Cond.empty()) - continue; - if (MLI->getLoopFor(TBB) == L && !TBB->isLandingPad()) - UncondJmpMBBs.push_back(std::make_pair(MBB, TBB)); - } - - bool Changed = OptimizeIntraLoopEdges(); + bool Changed = OptimizeIntraLoopEdges(MF); Changed |= AlignLoops(MF); - ChangedMBBs.clear(); - UncondJmpMBBs.clear(); - return Changed; } diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll index 4d25b0f9831..d7b9463b5e1 100644 --- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll +++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84 +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 83 ; rdar://6802189 ; Test if linearscan is unfavoring registers for allocation to allow more reuse diff --git a/test/CodeGen/X86/discontiguous-loops.ll b/test/CodeGen/X86/discontiguous-loops.ll new file mode 100644 index 00000000000..479c450ca20 --- /dev/null +++ b/test/CodeGen/X86/discontiguous-loops.ll @@ -0,0 +1,72 @@ +; RUN: llc -verify-loop-info -verify-dom-info -march=x86-64 < %s +; PR5243 + +@.str96 = external constant [37 x i8], align 8 ; <[37 x i8]*> [#uses=1] + +define void @foo() nounwind { +bb: + br label %ybb1 + +ybb1: ; preds = %yybb13, %xbb6, %bb + switch i32 undef, label %bb18 [ + i32 150, label %ybb2 + i32 151, label %bb17 + i32 152, label %bb19 + i32 157, label %ybb8 + ] + +ybb2: ; preds = %ybb1 + %tmp = icmp eq i8** undef, null ; [#uses=1] + br i1 %tmp, label %bb3, label %xbb6 + +bb3: ; preds = %ybb2 + unreachable + +xbb4: ; preds = %xbb6 + store i32 0, i32* undef, align 8 + br i1 undef, label %xbb6, label %bb5 + +bb5: ; preds = %xbb4 + call fastcc void @decl_mode_check_failed() nounwind + unreachable + +xbb6: ; preds = %xbb4, %ybb2 + %tmp7 = icmp slt i32 undef, 0 ; [#uses=1] + br i1 %tmp7, label %xbb4, label %ybb1 + +ybb8: ; preds = %ybb1 + %tmp9 = icmp eq i8** undef, null ; [#uses=1] + br i1 %tmp9, label %bb10, label %ybb12 + +bb10: ; preds = %ybb8 + %tmp11 = load i8** undef, align 8 ; [#uses=1] + call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind + unreachable + +ybb12: ; preds = %ybb8 + br i1 undef, label %bb15, label %ybb13 + +ybb13: ; preds = %ybb12 + %tmp14 = icmp sgt i32 undef, 0 ; [#uses=1] + br i1 %tmp14, label %bb16, label %ybb1 + +bb15: ; preds = %ybb12 + call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* undef) nounwind + unreachable + +bb16: ; preds = %ybb13 + unreachable + +bb17: ; preds = %ybb1 + unreachable + +bb18: ; preds = %ybb1 + unreachable + +bb19: ; preds = %ybb1 + unreachable +} + +declare void @fatal(i8*, ...) + +declare fastcc void @decl_mode_check_failed() nounwind diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll new file mode 100644 index 00000000000..af50bd95f3b --- /dev/null +++ b/test/CodeGen/X86/loop-blocks.ll @@ -0,0 +1,207 @@ +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s + +; These tests check for loop branching structure, and that the loop align +; directive is placed in the expected place. + +; CodeGen should insert a branch into the middle of the loop in +; order to avoid a branch within the loop. + +; CHECK: simple: +; CHECK: jmp .LBB1_1 +; CHECK-NEXT: align +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: call loop_latch +; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: call loop_header + +define void @simple() nounwind { +entry: + br label %loop + +loop: + call void @loop_header() + %t0 = tail call i32 @get() + %t1 = icmp slt i32 %t0, 0 + br i1 %t1, label %done, label %bb + +bb: + call void @loop_latch() + br label %loop + +done: + call void @exit() + ret void +} + +; CodeGen should move block_a to the top of the loop so that it +; falls through into the loop, avoiding a branch within the loop. + +; CHECK: slightly_more_involved: +; CHECK: jmp .LBB2_1 +; CHECK-NEXT: align +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: call bar99 +; CHECK-NEXT: .LBB2_1: +; CHECK-NEXT: call body + +define void @slightly_more_involved() nounwind { +entry: + br label %loop + +loop: + call void @body() + %t0 = call i32 @get() + %t1 = icmp slt i32 %t0, 2 + br i1 %t1, label %block_a, label %bb + +bb: + %t2 = call i32 @get() + %t3 = icmp slt i32 %t2, 99 + br i1 %t3, label %exit, label %loop + +block_a: + call void @bar99() + br label %loop + +exit: + call void @exit() + ret void +} + +; Same as slightly_more_involved, but block_a is now a CFG diamond with +; fallthrough edges which should be preserved. + +; CHECK: yet_more_involved: +; CHECK: jmp .LBB3_1 +; CHECK-NEXT: align +; CHECK-NEXT: .LBB3_7: +; CHECK-NEXT: call block_a_true_func +; CHECK-NEXT: jmp .LBB3_4 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: call bar99 +; CHECK-NEXT: call get +; CHECK-NEXT: cmpl $2999, %eax +; CHECK-NEXT: jle .LBB3_7 +; CHECK-NEXT: call block_a_false_func +; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: call block_a_merge_func +; CHECK-NEXT: .LBB3_1: +; CHECK-NEXT: call body + +define void @yet_more_involved() nounwind { +entry: + br label %loop + +loop: + call void @body() + %t0 = call i32 @get() + %t1 = icmp slt i32 %t0, 2 + br i1 %t1, label %block_a, label %bb + +bb: + %t2 = call i32 @get() + %t3 = icmp slt i32 %t2, 99 + br i1 %t3, label %exit, label %loop + +block_a: + call void @bar99() + %z0 = call i32 @get() + %z1 = icmp slt i32 %z0, 3000 + br i1 %z1, label %block_a_true, label %block_a_false + +block_a_true: + call void @block_a_true_func() + br label %block_a_merge + +block_a_false: + call void @block_a_false_func() + br label %block_a_merge + +block_a_merge: + call void @block_a_merge_func() + br label %loop + +exit: + call void @exit() + ret void +} + +; CodeGen should move the CFG islands that are part of the loop but don't +; conveniently fit anywhere so that they are at least contiguous with the +; loop. + +; CHECK: cfg_islands: +; CHECK: jmp .LBB4_1 +; CHECK-NEXT: align +; CHECK-NEXT: .LBB4_7: +; CHECK-NEXT: call bar100 +; CHECK-NEXT: jmp .LBB4_1 +; CHECK-NEXT: .LBB4_8: +; CHECK-NEXT: call bar101 +; CHECK-NEXT: jmp .LBB4_1 +; CHECK-NEXT: .LBB4_9: +; CHECK-NEXT: call bar102 +; CHECK-NEXT: jmp .LBB4_1 +; CHECK-NEXT: .LBB4_5: +; CHECK-NEXT: call loop_latch +; CHECK-NEXT: .LBB4_1: +; CHECK-NEXT: call loop_header + +define void @cfg_islands() nounwind { +entry: + br label %loop + +loop: + call void @loop_header() + %t0 = call i32 @get() + %t1 = icmp slt i32 %t0, 100 + br i1 %t1, label %block100, label %bb + +bb: + %t2 = call i32 @get() + %t3 = icmp slt i32 %t2, 101 + br i1 %t3, label %block101, label %bb1 + +bb1: + %t4 = call i32 @get() + %t5 = icmp slt i32 %t4, 102 + br i1 %t5, label %block102, label %bb2 + +bb2: + %t6 = call i32 @get() + %t7 = icmp slt i32 %t6, 103 + br i1 %t7, label %exit, label %bb3 + +bb3: + call void @loop_latch() + br label %loop + +exit: + call void @exit() + ret void + +block100: + call void @bar100() + br label %loop + +block101: + call void @bar101() + br label %loop + +block102: + call void @bar102() + br label %loop +} + +declare void @bar99() nounwind +declare void @bar100() nounwind +declare void @bar101() nounwind +declare void @bar102() nounwind +declare void @body() nounwind +declare void @exit() nounwind +declare void @loop_header() nounwind +declare void @loop_latch() nounwind +declare i32 @get() nounwind +declare void @block_a_true_func() nounwind +declare void @block_a_false_func() nounwind +declare void @block_a_merge_func() nounwind