Fix rdar://9289512 - not folding load into compare at -O0

The basic issue here is that bottom-up isel is matching the branch
and compare, and was failing to fold the load into the branch/compare
combo.  Fixing this (by allowing folding into any instruction of a
sequence that is selected) allows us to produce things like:


cmpb    $0, 52(%rax)
je      LBB4_2

instead of:

movb    52(%rax), %cl
cmpb    $0, %cl
je      LBB4_2

This makes the generated -O0 code run a bit faster, but also speeds up
compile time by putting less pressure on the register allocator and 
generating less code.

This was one of the biggest classes of missing load folding.  Implementing
this shrinks 176.gcc's c-decl.s (as a random example) by about 4% in (verbose-asm)
line count.

llvm-svn: 129656
This commit is contained in:
Chris Lattner 2011-04-17 06:35:44 +00:00
parent 1fe5f78b7e
commit 5e00f501ff
3 changed files with 65 additions and 17 deletions

View File

@ -280,7 +280,8 @@ private:
void PrepareEHLandingPad();
void SelectAllBasicBlocks(const Function &Fn);
bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS);
bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst,
FastISel *FastIS);
void FinishBasicBlock();
void SelectBasicBlock(BasicBlock::const_iterator Begin,

View File

@ -748,9 +748,31 @@ void SelectionDAGISel::PrepareEHLandingPad() {
/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
/// load into the specified FoldInst. Note that we could have a sequence where
/// multiple LLVM IR instructions are folded into the same machineinstr. For
/// example we could have:
/// A: x = load i32 *P
/// B: y = icmp A, 42
/// C: br y, ...
///
/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
/// any other folded instructions) because it is between A and C.
///
/// If we succeed in folding the load into the operation, return true.
///
bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
const Instruction *FoldInst,
FastISel *FastIS) {
SmallPtrSet<const Instruction*, 4> FoldedInsts;
for (BasicBlock::const_iterator II = FoldInst; &*II != LI; --II)
FoldedInsts.insert(II);
// We know that the load has a single use, but don't know what it is. If it
// isn't one of the folded instructions, then we can't succeed here.
if (!FoldedInsts.count(LI->use_back()))
return false;
// Don't try to fold volatile loads. Target has to deal with alignment
// constraints.
if (LI->isVolatile()) return false;
@ -835,10 +857,10 @@ static void CheckLineNumbers(const MachineBasicBlock *MBB) {
/// Return false if it needs to be emitted.
static bool isFoldedOrDeadInstruction(const Instruction *I,
FunctionLoweringInfo *FuncInfo) {
return !I->mayWriteToMemory() &&
!isa<TerminatorInst>(I) &&
!isa<DbgInfoIntrinsic>(I) &&
!FuncInfo->isExportedInst(I);
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
!isa<TerminatorInst>(I) && // Terminators aren't folded.
!isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
@ -930,16 +952,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->SelectInstruction(Inst)) {
// If fast isel succeeded, check to see if there is a single-use
// non-volatile load right before the selected instruction, and see if
// the load is used by the instruction. If so, try to fold it.
const Instruction *BeforeInst = 0;
if (Inst != Begin)
BeforeInst = llvm::prior(llvm::prior(BI));
if (BeforeInst && isa<LoadInst>(BeforeInst) &&
BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
--BI; // If we succeeded, don't re-select the load.
// If fast isel succeeded, skip over all the folded instructions, and
// then see if there is a load right before the selected instructions.
// Try to fold the load if so.
const Instruction *BeforeInst = Inst;
while (BeforeInst != Begin) {
BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
break;
}
if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
BeforeInst->hasOneUse() &&
TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
// If we succeeded, don't re-select the load.
BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
continue;
}

View File

@ -1,4 +1,4 @@
; RUN: llc < %s | FileCheck %s
; RUN: llc < %s -fast-isel -O0 -regalloc=fast | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
@ -12,3 +12,24 @@ define i32 @test1(i32 %i) nounwind ssp {
; CHECK: test1:
; CHECK: andl $8,
; rdar://9289512 - The load should fold into the compare.
define void @test2(i64 %x) nounwind ssp {
entry:
%x.addr = alloca i64, align 8
store i64 %x, i64* %x.addr, align 8
%tmp = load i64* %x.addr, align 8
%cmp = icmp sgt i64 %tmp, 42
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
; CHECK: test2:
; CHECK: movq %rdi, -8(%rsp)
; CHECK: cmpq $42, -8(%rsp)