[X86] Implement the support for shrink-wrapping.

With this patch the x86 backend is now shrink-wrapping capable
and this functionality can be tested by using the
-enable-shrink-wrap switch.

The next step is to make more test and enable shrink-wrapping by
default for x86.

Related to <rdar://problem/20821487>

llvm-svn: 238293
This commit is contained in:
Quentin Colombet 2015-05-27 06:28:41 +00:00
parent 8083588a7e
commit aa8020752e
4 changed files with 668 additions and 35 deletions

View File

@ -88,8 +88,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
const bool Uses64BitFramePtr =
STI->isTarget64BitLP64() || STI->isTargetNaCl64();
bool UseLEAForSP =
X86FL->useLEAForSPInProlog(*MBB.getParent());
// Check if we should use LEA for SP.
bool UseLEAForSP = STI->useLeaForSP() &&
X86FL->canUseLEAForSPInEpilogue(*MBB.getParent());
unsigned StackPtr = TRI->getStackRegister();
// Check for possible merge with preceding ADD instruction.
StackAdj += X86FrameLowering::mergeSPUpdates(MBB, MBBI, StackPtr, true);

View File

@ -565,7 +565,6 @@ static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
void X86FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
@ -965,15 +964,38 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
}
}
bool X86FrameLowering::useLEAForSPInProlog(const MachineFunction &MF) const {
bool X86FrameLowering::canUseLEAForSPInEpilogue(
const MachineFunction &MF) const {
// We can't use LEA instructions for adjusting the stack pointer if this is a
// leaf function in the Win64 ABI. Only ADD instructions may be used to
// deallocate the stack.
// This means that we can use LEA for SP in two situations:
// 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
// 2. We *have* a frame pointer which means we are permitted to use LEA.
return MF.getSubtarget<X86Subtarget>().useLeaForSP() &&
(!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF));
return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
}
/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
for (const MachineInstr &MI : MBB.terminators()) {
bool BreakNext = false;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (Reg != X86::EFLAGS)
continue;
// This terminator needs an eflag that is not defined
// by a previous terminator.
if (!MO.isDef())
return true;
BreakNext = true;
}
if (BreakNext)
break;
}
return false;
}
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
@ -983,9 +1005,10 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI != MBB.end() && "Returning block has no instructions");
DebugLoc DL = MBBI->getDebugLoc();
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
DebugLoc DL;
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
bool Is64Bit = STI.is64Bit();
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
@ -999,25 +1022,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
bool UseLEAForSP = useLEAForSPInProlog(MF);
switch (MBBI->getOpcode()) {
default:
llvm_unreachable("Can only insert epilogue into returning blocks");
case X86::RETQ:
case X86::RETL:
case X86::RETIL:
case X86::RETIQ:
case X86::TCRETURNdi:
case X86::TCRETURNri:
case X86::TCRETURNmi:
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
case X86::EH_RETURN:
case X86::EH_RETURN64:
break; // These are ok
}
bool UseLEAForSP = canUseLEAForSPInEpilogue(MF);
// If we can use LEA for SP but we shouldn't, check that none
// of the terminators uses the eflags. Otherwise we will insert
// a ADD that will redefine the eflags and break the condition.
// Alternatively, we could move the ADD, but this may not be possible
// and is an optimization anyway.
if (UseLEAForSP && !MF.getSubtarget<X86Subtarget>().useLeaForSP())
UseLEAForSP = terminatorsNeedFlagsAsInput(MBB);
// If that assert breaks, that means we do not do the right thing
// in canUseAsEpilogue.
assert((UseLEAForSP || !terminatorsNeedFlagsAsInput(MBB)) &&
"We shouldn't have allowed this insertion point");
// Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = MFI->getStackSize();
@ -1056,7 +1072,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
MachineBasicBlock::iterator FirstCSPop = MBBI;
DL = MBBI->getDebugLoc();
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
// If there is an ADD32ri or SUB32ri of ESP immediately before this
// instruction, merge the two instructions.
@ -1514,8 +1531,6 @@ static const uint64_t kSplitStackAvailable = 256;
void X86FrameLowering::adjustForSegmentedStacks(
MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
assert(&PrologueMBB == &MF.front() &&
"Shrink-wrapping is not implemented yet");
MachineFrameInfo *MFI = MF.getFrameInfo();
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
@ -1835,8 +1850,6 @@ void X86FrameLowering::adjustForHiPEPrologue(
// If the stack frame needed is larger than the guaranteed then runtime checks
// and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
if (MaxStack > Guaranteed) {
assert(&PrologueMBB == &MF.front() &&
"Shrink-wrapping is not implemented yet");
MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
@ -1979,3 +1992,15 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
}
}
bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
assert(MBB.getParent() && "Block is not attached to a function!");
if (canUseLEAForSPInEpilogue(*MBB.getParent()))
return true;
// If we cannot use LEA to adjust SP, we may need to use ADD, which
// clobbers the EFLAGS. Check that none of the terminators reads the
// EFLAGS, and if one uses it, conservatively assume this is not
// safe to insert the epilogue here.
return !terminatorsNeedFlagsAsInput(MBB);
}

View File

@ -96,8 +96,15 @@ public:
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI);
/// Check that LEA can be use on SP in a prologue sequence for \p MF.
bool useLEAForSPInProlog(const MachineFunction &MF) const;
/// Check that LEA can be used on SP in an epilogue sequence for \p MF.
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const;
/// Check whether or not the given \p MBB can be used as a epilogue
/// for the target.
/// The epilogue will be inserted before the first terminator of that block.
/// This method is used by the shrink-wrapping pass to decide if
/// \p MBB will be correctly handled by the target.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
private:
/// convertArgMovsToPushes - This method tries to convert a call sequence

View File

@ -0,0 +1,600 @@
; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
;
; Note: Lots of tests use inline asm instead of regular calls.
; This allows to have a better control on what the allocation will do.
; Otherwise, we may have spill right in the entry block, defeating
; shrink-wrapping. Moreover, some of the inline asm statement (nop)
; are here to ensure that the related paths do not end up as critical
; edges.
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "x86_64-apple-macosx"
; Initial motivating example: Simple diamond with a call just on one side.
; CHECK-LABEL: foo:
;
; Compare the arguments and jump to exit.
; No prologue needed.
; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
; ENABLE-NEXT: cmpl %esi, [[ARG0CPY]]
; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
;
; Prologue code.
; (What we push does not matter. It should be some random sratch register.)
; CHECK: pushq
;
; Compare the arguments and jump to exit.
; After the prologue is set.
; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
; DISABLE-NEXT: cmpl %esi, [[ARG0CPY]]
; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
;
; Store %a in the alloca.
; CHECK: movl [[ARG0CPY]], 4(%rsp)
; Set the alloca address in the second argument.
; CHECK-NEXT: leaq 4(%rsp), %rsi
; Set the first argument to zero.
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: callq _doSomething
;
; With shrink-wrapping, epilogue is just after the call.
; ENABLE-NEXT: addq $8, %rsp
;
; CHECK: [[EXIT_LABEL]]:
;
; Without shrink-wrapping, epilogue is in the exit block.
; Epilogue code. (What we pop does not matter.)
; DISABLE-NEXT: popq
;
; CHECK-NEXT: retq
define i32 @foo(i32 %a, i32 %b) {
%tmp = alloca i32, align 4
%tmp2 = icmp slt i32 %a, %b
br i1 %tmp2, label %true, label %false
true:
store i32 %a, i32* %tmp, align 4
%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
br label %false
false:
%tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
ret i32 %tmp.0
}
; Function Attrs: optsize
declare i32 @doSomething(i32, i32*)
; Check that we do not perform the restore inside the loop whereas the save
; is outside.
; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
;
; Shrink-wrapping allows to skip the prologue in the else case.
; ENABLE: testl %edi, %edi
; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; Prologue code.
; Make sure we save the CSR used in the inline asm: rbx.
; CHECK: pushq %rbx
;
; DISABLE: testl %edi, %edi
; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; SUM is in %esi because it is coalesced with the second
; argument on the else path.
; CHECK: xorl [[SUM:%esi]], [[SUM]]
; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
;
; Next BB.
; CHECK: [[LOOP:LBB[0-9_]+]]: ## %for.body
; CHECK: movl $1, [[TMP:%e[a-z]+]]
; CHECK: addl [[TMP]], [[SUM]]
; CHECK-NEXT: decl [[IV]]
; CHECK-NEXT: jne [[LOOP]]
;
; Next BB.
; SUM << 3.
; CHECK: shll $3, [[SUM]]
;
; Jump to epilogue.
; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
;
; DISABLE: [[ELSE_LABEL]]: ## %if.else
; Shift second argument by one and store into returned register.
; DISABLE: addl %esi, %esi
; DISABLE: [[EPILOG_BB]]: ## %if.end
;
; Epilogue code.
; CHECK-DAG: popq %rbx
; CHECK-DAG: movl %esi, %eax
; CHECK: retq
;
; ENABLE: [[ELSE_LABEL]]: ## %if.else
; Shift second argument by one and store into returned register.
; ENABLE: addl %esi, %esi
; ENABLE-NEXT: movl %esi, %eax
; ENABLE-NEXT: retq
define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
entry:
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %if.else, label %for.preheader
for.preheader:
tail call void asm "nop", ""()
br label %for.body
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
%sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
%call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
%add = add nsw i32 %call, %sum.04
%inc = add nuw nsw i32 %i.05, 1
%exitcond = icmp eq i32 %inc, 10
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
%shl = shl i32 %add, 3
br label %if.end
if.else: ; preds = %entry
%mul = shl nsw i32 %N, 1
br label %if.end
if.end: ; preds = %if.else, %for.end
%sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
ret i32 %sum.1
}
declare i32 @something(...)
; Check that we do not perform the shrink-wrapping inside the loop even
; though that would be legal. The cost model must prevent that.
; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
; Prologue code.
; Make sure we save the CSR used in the inline asm: rbx.
; CHECK: pushq %rbx
; CHECK: nop
; CHECK: xorl [[SUM:%e[a-z]+]], [[SUM]]
; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
; Next BB.
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
; CHECK: movl $1, [[TMP:%e[a-z]+]]
; CHECK: addl [[TMP]], [[SUM]]
; CHECK-NEXT: decl [[IV]]
; CHECK-NEXT: jne [[LOOP_LABEL]]
; Next BB.
; CHECK: ## %for.exit
; CHECK: nop
; CHECK: popq %rbx
; CHECK-NEXT: retq
define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
entry:
br label %for.preheader
for.preheader:
tail call void asm "nop", ""()
br label %for.body
for.body: ; preds = %for.body, %entry
%i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
%sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
%call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
%add = add nsw i32 %call, %sum.03
%inc = add nuw nsw i32 %i.04, 1
%exitcond = icmp eq i32 %inc, 10
br i1 %exitcond, label %for.exit, label %for.body
for.exit:
tail call void asm "nop", ""()
br label %for.end
for.end: ; preds = %for.body
ret i32 %add
}
; Check with a more complex case that we do not have save within the loop and
; restore outside.
; CHECK-LABEL: loopInfoSaveOutsideLoop:
;
; ENABLE: testl %edi, %edi
; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; Prologue code.
; Make sure we save the CSR used in the inline asm: rbx.
; CHECK: pushq %rbx
;
; DISABLE: testl %edi, %edi
; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; CHECK: nop
; CHECK: xorl [[SUM:%esi]], [[SUM]]
; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
;
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
; CHECK: movl $1, [[TMP:%e[a-z]+]]
; CHECK: addl [[TMP]], [[SUM]]
; CHECK-NEXT: decl [[IV]]
; CHECK-NEXT: jne [[LOOP_LABEL]]
; Next BB.
; CHECK: nop
; CHECK: shll $3, [[SUM]]
;
; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
;
; DISABLE: [[ELSE_LABEL]]: ## %if.else
; Shift second argument by one and store into returned register.
; DISABLE: addl %esi, %esi
; DISABLE: [[EPILOG_BB]]: ## %if.end
;
; Epilogue code.
; CHECK-DAG: popq %rbx
; CHECK-DAG: movl %esi, %eax
; CHECK: retq
;
; ENABLE: [[ELSE_LABEL]]: ## %if.else
; Shift second argument by one and store into returned register.
; ENABLE: addl %esi, %esi
; ENABLE-NEXT: movl %esi, %eax
; ENABLE-NEXT: retq
define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
entry:
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %if.else, label %for.preheader
for.preheader:
tail call void asm "nop", ""()
br label %for.body
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
%sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
%call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
%add = add nsw i32 %call, %sum.04
%inc = add nuw nsw i32 %i.05, 1
%exitcond = icmp eq i32 %inc, 10
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
tail call void asm "nop", "~{ebx}"()
%shl = shl i32 %add, 3
br label %if.end
if.else: ; preds = %entry
%mul = shl nsw i32 %N, 1
br label %if.end
if.end: ; preds = %if.else, %for.end
%sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
ret i32 %sum.1
}
declare void @somethingElse(...)
; Check with a more complex case that we do not have restore within the loop and
; save outside.
; CHECK-LABEL: loopInfoRestoreOutsideLoop:
;
; ENABLE: testl %edi, %edi
; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; Prologue code.
; Make sure we save the CSR used in the inline asm: rbx.
; CHECK: pushq %rbx
;
; DISABLE: testl %edi, %edi
; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; CHECK: nop
; CHECK: xorl [[SUM:%esi]], [[SUM]]
; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
;
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
; CHECK: movl $1, [[TMP:%e[a-z]+]]
; CHECK: addl [[TMP]], [[SUM]]
; CHECK-NEXT: decl [[IV]]
; CHECK-NEXT: jne [[LOOP_LABEL]]
; Next BB.
; CHECK: shll $3, [[SUM]]
;
; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
;
; DISABLE: [[ELSE_LABEL]]: ## %if.else
; Shift second argument by one and store into returned register.
; DISABLE: addl %esi, %esi
; DISABLE: [[EPILOG_BB]]: ## %if.end
;
; Epilogue code.
; CHECK-DAG: popq %rbx
; CHECK-DAG: movl %esi, %eax
; CHECK: retq
;
; ENABLE: [[ELSE_LABEL]]: ## %if.else
; Shift second argument by one and store into returned register.
; ENABLE: addl %esi, %esi
; ENABLE-NEXT: movl %esi, %eax
; ENABLE-NEXT: retq
define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
entry:
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
tail call void asm "nop", "~{ebx}"()
br label %for.body
for.body: ; preds = %for.body, %if.then
%i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
%sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
%call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
%add = add nsw i32 %call, %sum.04
%inc = add nuw nsw i32 %i.05, 1
%exitcond = icmp eq i32 %inc, 10
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
%shl = shl i32 %add, 3
br label %if.end
if.else: ; preds = %entry
%mul = shl nsw i32 %N, 1
br label %if.end
if.end: ; preds = %if.else, %for.end
%sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
ret i32 %sum.1
}
; Check that we handle function with no frame information correctly.
; CHECK-LABEL: emptyFrame:
; CHECK: ## %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
define i32 @emptyFrame() {
entry:
ret i32 0
}
; Check that we handle inline asm correctly.
; CHECK-LABEL: inlineAsm:
;
; ENABLE: testl %edi, %edi
; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; Prologue code.
; Make sure we save the CSR used in the inline asm: rbx.
; CHECK: pushq %rbx
;
; DISABLE: testl %edi, %edi
; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; CHECK: nop
; CHECK: movl $10, [[IV:%e[a-z]+]]
;
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
; Inline asm statement.
; CHECK: addl $1, %ebx
; CHECK: decl [[IV]]
; CHECK-NEXT: jne [[LOOP_LABEL]]
; Next BB.
; CHECK: nop
; CHECK: xorl %esi, %esi
;
; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
;
; DISABLE: [[ELSE_LABEL]]: ## %if.else
; Shift second argument by one and store into returned register.
; DISABLE: addl %esi, %esi
; DISABLE: [[EPILOG_BB]]: ## %if.end
;
; Epilogue code.
; CHECK-DAG: popq %rbx
; CHECK-DAG: movl %esi, %eax
; CHECK: retq
;
; ENABLE: [[ELSE_LABEL]]: ## %if.else
; Shift second argument by one and store into returned register.
; ENABLE: addl %esi, %esi
; ENABLE-NEXT: movl %esi, %eax
; ENABLE-NEXT: retq
define i32 @inlineAsm(i32 %cond, i32 %N) {
entry:
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %if.else, label %for.preheader
for.preheader:
tail call void asm "nop", ""()
br label %for.body
for.body: ; preds = %entry, %for.body
%i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
tail call void asm "addl $$1, %ebx", "~{ebx}"()
%inc = add nuw nsw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, 10
br i1 %exitcond, label %for.exit, label %for.body
for.exit:
tail call void asm "nop", ""()
br label %if.end
if.else: ; preds = %entry
%mul = shl nsw i32 %N, 1
br label %if.end
if.end: ; preds = %for.body, %if.else
%sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
ret i32 %sum.0
}
; Check that we handle calls to variadic functions correctly.
; CHECK-LABEL: callVariadicFunc:
;
; ENABLE: testl %edi, %edi
; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; Prologue code.
; CHECK: pushq
;
; DISABLE: testl %edi, %edi
; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
;
; Setup of the varags.
; CHECK: movl %esi, (%rsp)
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: %esi, %edi
; CHECK-NEXT: %esi, %edx
; CHECK-NEXT: %esi, %r8d
; CHECK-NEXT: %esi, %r9d
; CHECK-NEXT: %esi, %ecx
; CHECK-NEXT: callq _someVariadicFunc
; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: shll $3, %esi
;
; ENABLE-NEXT: addq $8, %rsp
; ENABLE-NEXT: movl %esi, %eax
; ENABLE-NEXT: retq
;
; DISABLE: jmp [[IFEND_LABEL:LBB[0-9_]+]]
;
; CHECK: [[ELSE_LABEL]]: ## %if.else
; Shift second argument by one and store into returned register.
; CHECK: addl %esi, %esi
;
; DISABLE: [[IFEND_LABEL]]: ## %if.end
;
; Epilogue code.
; CHECK-NEXT: movl %esi, %eax
; DISABLE-NEXT: popq
; CHECK-NEXT: retq
define i32 @callVariadicFunc(i32 %cond, i32 %N) {
entry:
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
%shl = shl i32 %call, 3
br label %if.end
if.else: ; preds = %entry
%mul = shl nsw i32 %N, 1
br label %if.end
if.end: ; preds = %if.else, %if.then
%sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
ret i32 %sum.0
}
declare i32 @someVariadicFunc(i32, ...)
; Check that we use LEA not to clobber EFLAGS.
%struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %union.tree_node*, %union.tree_node*, i8, i8, i32, i32, i64, i64 }
%union.tree_node = type { %struct.tree_decl }
%struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %union.tree_node*, i48, %union.anon, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %union.anon.1, %union.tree_node*, %union.tree_node*, %union.tree_node*, i64, %struct.lang_decl* }
%struct.tree_common = type { %union.tree_node*, %union.tree_node*, i32 }
%union.anon = type { i64 }
%union.anon.1 = type { %struct.function* }
%struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %union.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.ix86_args, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %union.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i24 }
%struct.eh_status = type opaque
%struct.stmt_status = type opaque
%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %union.tree_node**, %struct.rtx_def** }
%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack* }
%struct.varasm_status = type opaque
%struct.ix86_args = type { i32, i32, i32, i32, i32, i32, i32 }
%struct.initial_value_struct = type opaque
%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
%struct.machine_function = type opaque
%struct.language_function = type opaque
%struct.lang_decl = type opaque
%struct.rtx_def = type { i32, [1 x %union.rtunion_def] }
%union.rtunion_def = type { i64 }
declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly)
; CHECK-LABEL: useLEA:
; DISABLE: pushq
;
; CHECK: testq %rdi, %rdi
; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]]
;
; CHECK: movzwl (%rdi), [[BF_LOAD:%e[a-z]+]]
; CHECK-NEXT: cmpl $66, [[BF_LOAD]]
; CHECK-NEXT: jne [[CLEANUP]]
;
; CHECK: movq 8(%rdi), %rdi
; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]]
; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]]
; CHECK-NEXT: cmpl $14, [[TMP]]
; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]]
;
; CHECK: movl $24599, [[TMP2:%e[a-z]+]]
; CHECK-NEXT: btl [[TMP]], [[TMP2]]
; CHECK-NEXT: jb [[CLEANUP]]
;
; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false
; CHECK: cmpl $134, %e[[BF_LOAD2]]
; CHECK-NEXT: je [[CLEANUP]]
;
; CHECK: cmpl $140, %e[[BF_LOAD2]]
; CHECK-NEXT: je [[CLEANUP]]
;
; ENABLE: pushq
; CHECK: callq _find_temp_slot_from_address
; CHECK-NEXT: testq %rax, %rax
;
; The adjustment must use LEA here (or be moved above the test).
; ENABLE-NEXT: leaq 8(%rsp), %rsp
;
; CHECK-NEXT: je [[CLEANUP]]
;
; CHECK: movb $1, 57(%rax)
;
; CHECK: [[CLEANUP]]: ## %cleanup
; DISABLE: popq
; CHECK-NEXT: retq
define void @useLEA(%struct.rtx_def* readonly %x) {
entry:
%cmp = icmp eq %struct.rtx_def* %x, null
br i1 %cmp, label %cleanup, label %if.end
if.end: ; preds = %entry
%tmp = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 0
%bf.load = load i32, i32* %tmp, align 8
%bf.clear = and i32 %bf.load, 65535
%cmp1 = icmp eq i32 %bf.clear, 66
br i1 %cmp1, label %lor.lhs.false, label %cleanup
lor.lhs.false: ; preds = %if.end
%arrayidx = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 1, i64 0
%rtx = bitcast %union.rtunion_def* %arrayidx to %struct.rtx_def**
%tmp1 = load %struct.rtx_def*, %struct.rtx_def** %rtx, align 8
%tmp2 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %tmp1, i64 0, i32 0
%bf.load2 = load i32, i32* %tmp2, align 8
%bf.clear3 = and i32 %bf.load2, 65535
switch i32 %bf.clear3, label %if.end.55 [
i32 67, label %cleanup
i32 68, label %cleanup
i32 54, label %cleanup
i32 55, label %cleanup
i32 58, label %cleanup
i32 134, label %cleanup
i32 56, label %cleanup
i32 140, label %cleanup
]
if.end.55: ; preds = %lor.lhs.false
%call = tail call fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* %tmp1) #2
%cmp59 = icmp eq %struct.temp_slot* %call, null
br i1 %cmp59, label %cleanup, label %if.then.60
if.then.60: ; preds = %if.end.55
%addr_taken = getelementptr inbounds %struct.temp_slot, %struct.temp_slot* %call, i64 0, i32 8
store i8 1, i8* %addr_taken, align 1
br label %cleanup
cleanup: ; preds = %if.then.60, %if.end.55, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %if.end, %entry
ret void
}