mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-13 23:18:51 +00:00
[X86] Implement the support for shrink-wrapping.
With this patch the x86 backend is now shrink-wrapping capable and this functionality can be tested by using the -enable-shrink-wrap switch. The next step is to make more test and enable shrink-wrapping by default for x86. Related to <rdar://problem/20821487> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238293 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bdfc11784b
commit
60c91c28e4
@ -88,8 +88,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
|
||||
const bool Uses64BitFramePtr =
|
||||
STI->isTarget64BitLP64() || STI->isTargetNaCl64();
|
||||
bool UseLEAForSP =
|
||||
X86FL->useLEAForSPInProlog(*MBB.getParent());
|
||||
// Check if we should use LEA for SP.
|
||||
bool UseLEAForSP = STI->useLeaForSP() &&
|
||||
X86FL->canUseLEAForSPInEpilogue(*MBB.getParent());
|
||||
unsigned StackPtr = TRI->getStackRegister();
|
||||
// Check for possible merge with preceding ADD instruction.
|
||||
StackAdj += X86FrameLowering::mergeSPUpdates(MBB, MBBI, StackPtr, true);
|
||||
|
@ -565,7 +565,6 @@ static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
|
||||
|
||||
void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
const Function *Fn = MF.getFunction();
|
||||
@ -965,15 +964,38 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
}
|
||||
}
|
||||
|
||||
bool X86FrameLowering::useLEAForSPInProlog(const MachineFunction &MF) const {
|
||||
bool X86FrameLowering::canUseLEAForSPInEpilogue(
|
||||
const MachineFunction &MF) const {
|
||||
// We can't use LEA instructions for adjusting the stack pointer if this is a
|
||||
// leaf function in the Win64 ABI. Only ADD instructions may be used to
|
||||
// deallocate the stack.
|
||||
// This means that we can use LEA for SP in two situations:
|
||||
// 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
|
||||
// 2. We *have* a frame pointer which means we are permitted to use LEA.
|
||||
return MF.getSubtarget<X86Subtarget>().useLeaForSP() &&
|
||||
(!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF));
|
||||
return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
|
||||
}
|
||||
|
||||
/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
|
||||
static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
|
||||
for (const MachineInstr &MI : MBB.terminators()) {
|
||||
bool BreakNext = false;
|
||||
for (const MachineOperand &MO : MI.operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
if (Reg != X86::EFLAGS)
|
||||
continue;
|
||||
|
||||
// This terminator needs an eflag that is not defined
|
||||
// by a previous terminator.
|
||||
if (!MO.isDef())
|
||||
return true;
|
||||
BreakNext = true;
|
||||
}
|
||||
if (BreakNext)
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
@ -983,9 +1005,10 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
|
||||
const TargetInstrInfo &TII = *STI.getInstrInfo();
|
||||
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
|
||||
assert(MBBI != MBB.end() && "Returning block has no instructions");
|
||||
DebugLoc DL = MBBI->getDebugLoc();
|
||||
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
||||
DebugLoc DL;
|
||||
if (MBBI != MBB.end())
|
||||
DL = MBBI->getDebugLoc();
|
||||
bool Is64Bit = STI.is64Bit();
|
||||
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
|
||||
const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
|
||||
@ -999,25 +1022,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
|
||||
bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
|
||||
bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
|
||||
bool UseLEAForSP = useLEAForSPInProlog(MF);
|
||||
|
||||
switch (MBBI->getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Can only insert epilogue into returning blocks");
|
||||
case X86::RETQ:
|
||||
case X86::RETL:
|
||||
case X86::RETIL:
|
||||
case X86::RETIQ:
|
||||
case X86::TCRETURNdi:
|
||||
case X86::TCRETURNri:
|
||||
case X86::TCRETURNmi:
|
||||
case X86::TCRETURNdi64:
|
||||
case X86::TCRETURNri64:
|
||||
case X86::TCRETURNmi64:
|
||||
case X86::EH_RETURN:
|
||||
case X86::EH_RETURN64:
|
||||
break; // These are ok
|
||||
}
|
||||
bool UseLEAForSP = canUseLEAForSPInEpilogue(MF);
|
||||
// If we can use LEA for SP but we shouldn't, check that none
|
||||
// of the terminators uses the eflags. Otherwise we will insert
|
||||
// a ADD that will redefine the eflags and break the condition.
|
||||
// Alternatively, we could move the ADD, but this may not be possible
|
||||
// and is an optimization anyway.
|
||||
if (UseLEAForSP && !MF.getSubtarget<X86Subtarget>().useLeaForSP())
|
||||
UseLEAForSP = terminatorsNeedFlagsAsInput(MBB);
|
||||
// If that assert breaks, that means we do not do the right thing
|
||||
// in canUseAsEpilogue.
|
||||
assert((UseLEAForSP || !terminatorsNeedFlagsAsInput(MBB)) &&
|
||||
"We shouldn't have allowed this insertion point");
|
||||
|
||||
// Get the number of bytes to allocate from the FrameInfo.
|
||||
uint64_t StackSize = MFI->getStackSize();
|
||||
@ -1056,7 +1072,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
}
|
||||
MachineBasicBlock::iterator FirstCSPop = MBBI;
|
||||
|
||||
DL = MBBI->getDebugLoc();
|
||||
if (MBBI != MBB.end())
|
||||
DL = MBBI->getDebugLoc();
|
||||
|
||||
// If there is an ADD32ri or SUB32ri of ESP immediately before this
|
||||
// instruction, merge the two instructions.
|
||||
@ -1514,8 +1531,6 @@ static const uint64_t kSplitStackAvailable = 256;
|
||||
|
||||
void X86FrameLowering::adjustForSegmentedStacks(
|
||||
MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
|
||||
assert(&PrologueMBB == &MF.front() &&
|
||||
"Shrink-wrapping is not implemented yet");
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
|
||||
const TargetInstrInfo &TII = *STI.getInstrInfo();
|
||||
@ -1835,8 +1850,6 @@ void X86FrameLowering::adjustForHiPEPrologue(
|
||||
// If the stack frame needed is larger than the guaranteed then runtime checks
|
||||
// and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
|
||||
if (MaxStack > Guaranteed) {
|
||||
assert(&PrologueMBB == &MF.front() &&
|
||||
"Shrink-wrapping is not implemented yet");
|
||||
MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
|
||||
MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
|
||||
|
||||
@ -1979,3 +1992,15 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
}
|
||||
}
|
||||
|
||||
bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
|
||||
assert(MBB.getParent() && "Block is not attached to a function!");
|
||||
|
||||
if (canUseLEAForSPInEpilogue(*MBB.getParent()))
|
||||
return true;
|
||||
|
||||
// If we cannot use LEA to adjust SP, we may need to use ADD, which
|
||||
// clobbers the EFLAGS. Check that none of the terminators reads the
|
||||
// EFLAGS, and if one uses it, conservatively assume this is not
|
||||
// safe to insert the epilogue here.
|
||||
return !terminatorsNeedFlagsAsInput(MBB);
|
||||
}
|
||||
|
@ -96,8 +96,15 @@ public:
|
||||
const TargetInstrInfo &TII,
|
||||
const TargetRegisterInfo &TRI);
|
||||
|
||||
/// Check that LEA can be use on SP in a prologue sequence for \p MF.
|
||||
bool useLEAForSPInProlog(const MachineFunction &MF) const;
|
||||
/// Check that LEA can be used on SP in an epilogue sequence for \p MF.
|
||||
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const;
|
||||
|
||||
/// Check whether or not the given \p MBB can be used as a epilogue
|
||||
/// for the target.
|
||||
/// The epilogue will be inserted before the first terminator of that block.
|
||||
/// This method is used by the shrink-wrapping pass to decide if
|
||||
/// \p MBB will be correctly handled by the target.
|
||||
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
|
||||
|
||||
private:
|
||||
/// convertArgMovsToPushes - This method tries to convert a call sequence
|
||||
|
600
test/CodeGen/X86/x86-shrink-wrapping.ll
Normal file
600
test/CodeGen/X86/x86-shrink-wrapping.ll
Normal file
@ -0,0 +1,600 @@
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
|
||||
; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
|
||||
;
|
||||
; Note: Lots of tests use inline asm instead of regular calls.
|
||||
; This allows to have a better control on what the allocation will do.
|
||||
; Otherwise, we may have spill right in the entry block, defeating
|
||||
; shrink-wrapping. Moreover, some of the inline asm statement (nop)
|
||||
; are here to ensure that the related paths do not end up as critical
|
||||
; edges.
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "x86_64-apple-macosx"
|
||||
|
||||
|
||||
; Initial motivating example: Simple diamond with a call just on one side.
|
||||
; CHECK-LABEL: foo:
|
||||
;
|
||||
; Compare the arguments and jump to exit.
|
||||
; No prologue needed.
|
||||
; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
|
||||
; ENABLE-NEXT: cmpl %esi, [[ARG0CPY]]
|
||||
; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Prologue code.
|
||||
; (What we push does not matter. It should be some random sratch register.)
|
||||
; CHECK: pushq
|
||||
;
|
||||
; Compare the arguments and jump to exit.
|
||||
; After the prologue is set.
|
||||
; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
|
||||
; DISABLE-NEXT: cmpl %esi, [[ARG0CPY]]
|
||||
; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Store %a in the alloca.
|
||||
; CHECK: movl [[ARG0CPY]], 4(%rsp)
|
||||
; Set the alloca address in the second argument.
|
||||
; CHECK-NEXT: leaq 4(%rsp), %rsi
|
||||
; Set the first argument to zero.
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: callq _doSomething
|
||||
;
|
||||
; With shrink-wrapping, epilogue is just after the call.
|
||||
; ENABLE-NEXT: addq $8, %rsp
|
||||
;
|
||||
; CHECK: [[EXIT_LABEL]]:
|
||||
;
|
||||
; Without shrink-wrapping, epilogue is in the exit block.
|
||||
; Epilogue code. (What we pop does not matter.)
|
||||
; DISABLE-NEXT: popq
|
||||
;
|
||||
; CHECK-NEXT: retq
|
||||
define i32 @foo(i32 %a, i32 %b) {
|
||||
%tmp = alloca i32, align 4
|
||||
%tmp2 = icmp slt i32 %a, %b
|
||||
br i1 %tmp2, label %true, label %false
|
||||
|
||||
true:
|
||||
store i32 %a, i32* %tmp, align 4
|
||||
%tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
|
||||
br label %false
|
||||
|
||||
false:
|
||||
%tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
|
||||
ret i32 %tmp.0
|
||||
}
|
||||
|
||||
; Function Attrs: optsize
|
||||
declare i32 @doSomething(i32, i32*)
|
||||
|
||||
|
||||
; Check that we do not perform the restore inside the loop whereas the save
|
||||
; is outside.
|
||||
; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
|
||||
;
|
||||
; Shrink-wrapping allows to skip the prologue in the else case.
|
||||
; ENABLE: testl %edi, %edi
|
||||
; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Prologue code.
|
||||
; Make sure we save the CSR used in the inline asm: rbx.
|
||||
; CHECK: pushq %rbx
|
||||
;
|
||||
; DISABLE: testl %edi, %edi
|
||||
; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; SUM is in %esi because it is coalesced with the second
|
||||
; argument on the else path.
|
||||
; CHECK: xorl [[SUM:%esi]], [[SUM]]
|
||||
; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
|
||||
;
|
||||
; Next BB.
|
||||
; CHECK: [[LOOP:LBB[0-9_]+]]: ## %for.body
|
||||
; CHECK: movl $1, [[TMP:%e[a-z]+]]
|
||||
; CHECK: addl [[TMP]], [[SUM]]
|
||||
; CHECK-NEXT: decl [[IV]]
|
||||
; CHECK-NEXT: jne [[LOOP]]
|
||||
;
|
||||
; Next BB.
|
||||
; SUM << 3.
|
||||
; CHECK: shll $3, [[SUM]]
|
||||
;
|
||||
; Jump to epilogue.
|
||||
; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
|
||||
;
|
||||
; DISABLE: [[ELSE_LABEL]]: ## %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; DISABLE: addl %esi, %esi
|
||||
; DISABLE: [[EPILOG_BB]]: ## %if.end
|
||||
;
|
||||
; Epilogue code.
|
||||
; CHECK-DAG: popq %rbx
|
||||
; CHECK-DAG: movl %esi, %eax
|
||||
; CHECK: retq
|
||||
;
|
||||
; ENABLE: [[ELSE_LABEL]]: ## %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; ENABLE: addl %esi, %esi
|
||||
; ENABLE-NEXT: movl %esi, %eax
|
||||
; ENABLE-NEXT: retq
|
||||
define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %for.preheader
|
||||
|
||||
for.preheader:
|
||||
tail call void asm "nop", ""()
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
|
||||
%sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
|
||||
%call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
|
||||
%add = add nsw i32 %call, %sum.04
|
||||
%inc = add nuw nsw i32 %i.05, 1
|
||||
%exitcond = icmp eq i32 %inc, 10
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
%shl = shl i32 %add, 3
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%mul = shl nsw i32 %N, 1
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %for.end
|
||||
%sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
|
||||
ret i32 %sum.1
|
||||
}
|
||||
|
||||
declare i32 @something(...)
|
||||
|
||||
; Check that we do not perform the shrink-wrapping inside the loop even
|
||||
; though that would be legal. The cost model must prevent that.
|
||||
; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
|
||||
; Prologue code.
|
||||
; Make sure we save the CSR used in the inline asm: rbx.
|
||||
; CHECK: pushq %rbx
|
||||
; CHECK: nop
|
||||
; CHECK: xorl [[SUM:%e[a-z]+]], [[SUM]]
|
||||
; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
|
||||
; Next BB.
|
||||
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
|
||||
; CHECK: movl $1, [[TMP:%e[a-z]+]]
|
||||
; CHECK: addl [[TMP]], [[SUM]]
|
||||
; CHECK-NEXT: decl [[IV]]
|
||||
; CHECK-NEXT: jne [[LOOP_LABEL]]
|
||||
; Next BB.
|
||||
; CHECK: ## %for.exit
|
||||
; CHECK: nop
|
||||
; CHECK: popq %rbx
|
||||
; CHECK-NEXT: retq
|
||||
define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
|
||||
entry:
|
||||
br label %for.preheader
|
||||
|
||||
for.preheader:
|
||||
tail call void asm "nop", ""()
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
|
||||
%sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
|
||||
%call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
|
||||
%add = add nsw i32 %call, %sum.03
|
||||
%inc = add nuw nsw i32 %i.04, 1
|
||||
%exitcond = icmp eq i32 %inc, 10
|
||||
br i1 %exitcond, label %for.exit, label %for.body
|
||||
|
||||
for.exit:
|
||||
tail call void asm "nop", ""()
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; Check with a more complex case that we do not have save within the loop and
|
||||
; restore outside.
|
||||
; CHECK-LABEL: loopInfoSaveOutsideLoop:
|
||||
;
|
||||
; ENABLE: testl %edi, %edi
|
||||
; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Prologue code.
|
||||
; Make sure we save the CSR used in the inline asm: rbx.
|
||||
; CHECK: pushq %rbx
|
||||
;
|
||||
; DISABLE: testl %edi, %edi
|
||||
; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: nop
|
||||
; CHECK: xorl [[SUM:%esi]], [[SUM]]
|
||||
; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
|
||||
;
|
||||
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
|
||||
; CHECK: movl $1, [[TMP:%e[a-z]+]]
|
||||
; CHECK: addl [[TMP]], [[SUM]]
|
||||
; CHECK-NEXT: decl [[IV]]
|
||||
; CHECK-NEXT: jne [[LOOP_LABEL]]
|
||||
; Next BB.
|
||||
; CHECK: nop
|
||||
; CHECK: shll $3, [[SUM]]
|
||||
;
|
||||
; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
|
||||
;
|
||||
; DISABLE: [[ELSE_LABEL]]: ## %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; DISABLE: addl %esi, %esi
|
||||
; DISABLE: [[EPILOG_BB]]: ## %if.end
|
||||
;
|
||||
; Epilogue code.
|
||||
; CHECK-DAG: popq %rbx
|
||||
; CHECK-DAG: movl %esi, %eax
|
||||
; CHECK: retq
|
||||
;
|
||||
; ENABLE: [[ELSE_LABEL]]: ## %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; ENABLE: addl %esi, %esi
|
||||
; ENABLE-NEXT: movl %esi, %eax
|
||||
; ENABLE-NEXT: retq
|
||||
define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %for.preheader
|
||||
|
||||
for.preheader:
|
||||
tail call void asm "nop", ""()
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
|
||||
%sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
|
||||
%call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
|
||||
%add = add nsw i32 %call, %sum.04
|
||||
%inc = add nuw nsw i32 %i.05, 1
|
||||
%exitcond = icmp eq i32 %inc, 10
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
tail call void asm "nop", "~{ebx}"()
|
||||
%shl = shl i32 %add, 3
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%mul = shl nsw i32 %N, 1
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %for.end
|
||||
%sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
|
||||
ret i32 %sum.1
|
||||
}
|
||||
|
||||
declare void @somethingElse(...)
|
||||
|
||||
; Check with a more complex case that we do not have restore within the loop and
|
||||
; save outside.
|
||||
; CHECK-LABEL: loopInfoRestoreOutsideLoop:
|
||||
;
|
||||
; ENABLE: testl %edi, %edi
|
||||
; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Prologue code.
|
||||
; Make sure we save the CSR used in the inline asm: rbx.
|
||||
; CHECK: pushq %rbx
|
||||
;
|
||||
; DISABLE: testl %edi, %edi
|
||||
; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: nop
|
||||
; CHECK: xorl [[SUM:%esi]], [[SUM]]
|
||||
; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
|
||||
;
|
||||
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
|
||||
; CHECK: movl $1, [[TMP:%e[a-z]+]]
|
||||
; CHECK: addl [[TMP]], [[SUM]]
|
||||
; CHECK-NEXT: decl [[IV]]
|
||||
; CHECK-NEXT: jne [[LOOP_LABEL]]
|
||||
; Next BB.
|
||||
; CHECK: shll $3, [[SUM]]
|
||||
;
|
||||
; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
|
||||
;
|
||||
; DISABLE: [[ELSE_LABEL]]: ## %if.else
|
||||
|
||||
; Shift second argument by one and store into returned register.
|
||||
; DISABLE: addl %esi, %esi
|
||||
; DISABLE: [[EPILOG_BB]]: ## %if.end
|
||||
;
|
||||
; Epilogue code.
|
||||
; CHECK-DAG: popq %rbx
|
||||
; CHECK-DAG: movl %esi, %eax
|
||||
; CHECK: retq
|
||||
;
|
||||
; ENABLE: [[ELSE_LABEL]]: ## %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; ENABLE: addl %esi, %esi
|
||||
; ENABLE-NEXT: movl %esi, %eax
|
||||
; ENABLE-NEXT: retq
|
||||
define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
tail call void asm "nop", "~{ebx}"()
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %if.then
|
||||
%i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
|
||||
%sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
|
||||
%call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
|
||||
%add = add nsw i32 %call, %sum.04
|
||||
%inc = add nuw nsw i32 %i.05, 1
|
||||
%exitcond = icmp eq i32 %inc, 10
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
%shl = shl i32 %add, 3
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%mul = shl nsw i32 %N, 1
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %for.end
|
||||
%sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
|
||||
ret i32 %sum.1
|
||||
}
|
||||
|
||||
; Check that we handle function with no frame information correctly.
|
||||
; CHECK-LABEL: emptyFrame:
|
||||
; CHECK: ## %entry
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: retq
|
||||
define i32 @emptyFrame() {
|
||||
entry:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Check that we handle inline asm correctly.
|
||||
; CHECK-LABEL: inlineAsm:
|
||||
;
|
||||
; ENABLE: testl %edi, %edi
|
||||
; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Prologue code.
|
||||
; Make sure we save the CSR used in the inline asm: rbx.
|
||||
; CHECK: pushq %rbx
|
||||
;
|
||||
; DISABLE: testl %edi, %edi
|
||||
; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: nop
|
||||
; CHECK: movl $10, [[IV:%e[a-z]+]]
|
||||
;
|
||||
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
|
||||
; Inline asm statement.
|
||||
; CHECK: addl $1, %ebx
|
||||
; CHECK: decl [[IV]]
|
||||
; CHECK-NEXT: jne [[LOOP_LABEL]]
|
||||
; Next BB.
|
||||
; CHECK: nop
|
||||
; CHECK: xorl %esi, %esi
|
||||
;
|
||||
; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
|
||||
;
|
||||
; DISABLE: [[ELSE_LABEL]]: ## %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; DISABLE: addl %esi, %esi
|
||||
; DISABLE: [[EPILOG_BB]]: ## %if.end
|
||||
;
|
||||
; Epilogue code.
|
||||
; CHECK-DAG: popq %rbx
|
||||
; CHECK-DAG: movl %esi, %eax
|
||||
; CHECK: retq
|
||||
;
|
||||
; ENABLE: [[ELSE_LABEL]]: ## %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; ENABLE: addl %esi, %esi
|
||||
; ENABLE-NEXT: movl %esi, %eax
|
||||
; ENABLE-NEXT: retq
|
||||
define i32 @inlineAsm(i32 %cond, i32 %N) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %for.preheader
|
||||
|
||||
for.preheader:
|
||||
tail call void asm "nop", ""()
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
|
||||
tail call void asm "addl $$1, %ebx", "~{ebx}"()
|
||||
%inc = add nuw nsw i32 %i.03, 1
|
||||
%exitcond = icmp eq i32 %inc, 10
|
||||
br i1 %exitcond, label %for.exit, label %for.body
|
||||
|
||||
for.exit:
|
||||
tail call void asm "nop", ""()
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%mul = shl nsw i32 %N, 1
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %for.body, %if.else
|
||||
%sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
|
||||
ret i32 %sum.0
|
||||
}
|
||||
|
||||
; Check that we handle calls to variadic functions correctly.
|
||||
; CHECK-LABEL: callVariadicFunc:
|
||||
;
|
||||
; ENABLE: testl %edi, %edi
|
||||
; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Prologue code.
|
||||
; CHECK: pushq
|
||||
;
|
||||
; DISABLE: testl %edi, %edi
|
||||
; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Setup of the varags.
|
||||
; CHECK: movl %esi, (%rsp)
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: %esi, %edi
|
||||
; CHECK-NEXT: %esi, %edx
|
||||
; CHECK-NEXT: %esi, %r8d
|
||||
; CHECK-NEXT: %esi, %r9d
|
||||
; CHECK-NEXT: %esi, %ecx
|
||||
; CHECK-NEXT: callq _someVariadicFunc
|
||||
; CHECK-NEXT: movl %eax, %esi
|
||||
; CHECK-NEXT: shll $3, %esi
|
||||
;
|
||||
; ENABLE-NEXT: addq $8, %rsp
|
||||
; ENABLE-NEXT: movl %esi, %eax
|
||||
; ENABLE-NEXT: retq
|
||||
;
|
||||
; DISABLE: jmp [[IFEND_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: [[ELSE_LABEL]]: ## %if.else
|
||||
; Shift second argument by one and store into returned register.
|
||||
; CHECK: addl %esi, %esi
|
||||
;
|
||||
; DISABLE: [[IFEND_LABEL]]: ## %if.end
|
||||
;
|
||||
; Epilogue code.
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; DISABLE-NEXT: popq
|
||||
; CHECK-NEXT: retq
|
||||
define i32 @callVariadicFunc(i32 %cond, i32 %N) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
|
||||
%shl = shl i32 %call, 3
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%mul = shl nsw i32 %N, 1
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
|
||||
ret i32 %sum.0
|
||||
}
|
||||
|
||||
declare i32 @someVariadicFunc(i32, ...)
|
||||
|
||||
; Check that we use LEA not to clobber EFLAGS.
|
||||
%struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %union.tree_node*, %union.tree_node*, i8, i8, i32, i32, i64, i64 }
|
||||
%union.tree_node = type { %struct.tree_decl }
|
||||
%struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %union.tree_node*, i48, %union.anon, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %union.anon.1, %union.tree_node*, %union.tree_node*, %union.tree_node*, i64, %struct.lang_decl* }
|
||||
%struct.tree_common = type { %union.tree_node*, %union.tree_node*, i32 }
|
||||
%union.anon = type { i64 }
|
||||
%union.anon.1 = type { %struct.function* }
|
||||
%struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %union.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.ix86_args, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %union.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i24 }
|
||||
%struct.eh_status = type opaque
|
||||
%struct.stmt_status = type opaque
|
||||
%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
|
||||
%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %union.tree_node**, %struct.rtx_def** }
|
||||
%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack* }
|
||||
%struct.varasm_status = type opaque
|
||||
%struct.ix86_args = type { i32, i32, i32, i32, i32, i32, i32 }
|
||||
%struct.initial_value_struct = type opaque
|
||||
%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
|
||||
%struct.machine_function = type opaque
|
||||
%struct.language_function = type opaque
|
||||
%struct.lang_decl = type opaque
|
||||
%struct.rtx_def = type { i32, [1 x %union.rtunion_def] }
|
||||
%union.rtunion_def = type { i64 }
|
||||
|
||||
declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly)
|
||||
|
||||
; CHECK-LABEL: useLEA:
|
||||
; DISABLE: pushq
|
||||
;
|
||||
; CHECK: testq %rdi, %rdi
|
||||
; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: movzwl (%rdi), [[BF_LOAD:%e[a-z]+]]
|
||||
; CHECK-NEXT: cmpl $66, [[BF_LOAD]]
|
||||
; CHECK-NEXT: jne [[CLEANUP]]
|
||||
;
|
||||
; CHECK: movq 8(%rdi), %rdi
|
||||
; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]]
|
||||
; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]]
|
||||
; CHECK-NEXT: cmpl $14, [[TMP]]
|
||||
; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: movl $24599, [[TMP2:%e[a-z]+]]
|
||||
; CHECK-NEXT: btl [[TMP]], [[TMP2]]
|
||||
; CHECK-NEXT: jb [[CLEANUP]]
|
||||
;
|
||||
; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false
|
||||
; CHECK: cmpl $134, %e[[BF_LOAD2]]
|
||||
; CHECK-NEXT: je [[CLEANUP]]
|
||||
;
|
||||
; CHECK: cmpl $140, %e[[BF_LOAD2]]
|
||||
; CHECK-NEXT: je [[CLEANUP]]
|
||||
;
|
||||
; ENABLE: pushq
|
||||
; CHECK: callq _find_temp_slot_from_address
|
||||
; CHECK-NEXT: testq %rax, %rax
|
||||
;
|
||||
; The adjustment must use LEA here (or be moved above the test).
|
||||
; ENABLE-NEXT: leaq 8(%rsp), %rsp
|
||||
;
|
||||
; CHECK-NEXT: je [[CLEANUP]]
|
||||
;
|
||||
; CHECK: movb $1, 57(%rax)
|
||||
;
|
||||
; CHECK: [[CLEANUP]]: ## %cleanup
|
||||
; DISABLE: popq
|
||||
; CHECK-NEXT: retq
|
||||
define void @useLEA(%struct.rtx_def* readonly %x) {
|
||||
entry:
|
||||
%cmp = icmp eq %struct.rtx_def* %x, null
|
||||
br i1 %cmp, label %cleanup, label %if.end
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%tmp = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 0
|
||||
%bf.load = load i32, i32* %tmp, align 8
|
||||
%bf.clear = and i32 %bf.load, 65535
|
||||
%cmp1 = icmp eq i32 %bf.clear, 66
|
||||
br i1 %cmp1, label %lor.lhs.false, label %cleanup
|
||||
|
||||
lor.lhs.false: ; preds = %if.end
|
||||
%arrayidx = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 1, i64 0
|
||||
%rtx = bitcast %union.rtunion_def* %arrayidx to %struct.rtx_def**
|
||||
%tmp1 = load %struct.rtx_def*, %struct.rtx_def** %rtx, align 8
|
||||
%tmp2 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %tmp1, i64 0, i32 0
|
||||
%bf.load2 = load i32, i32* %tmp2, align 8
|
||||
%bf.clear3 = and i32 %bf.load2, 65535
|
||||
switch i32 %bf.clear3, label %if.end.55 [
|
||||
i32 67, label %cleanup
|
||||
i32 68, label %cleanup
|
||||
i32 54, label %cleanup
|
||||
i32 55, label %cleanup
|
||||
i32 58, label %cleanup
|
||||
i32 134, label %cleanup
|
||||
i32 56, label %cleanup
|
||||
i32 140, label %cleanup
|
||||
]
|
||||
|
||||
if.end.55: ; preds = %lor.lhs.false
|
||||
%call = tail call fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* %tmp1) #2
|
||||
%cmp59 = icmp eq %struct.temp_slot* %call, null
|
||||
br i1 %cmp59, label %cleanup, label %if.then.60
|
||||
|
||||
if.then.60: ; preds = %if.end.55
|
||||
%addr_taken = getelementptr inbounds %struct.temp_slot, %struct.temp_slot* %call, i64 0, i32 8
|
||||
store i8 1, i8* %addr_taken, align 1
|
||||
br label %cleanup
|
||||
|
||||
cleanup: ; preds = %if.then.60, %if.end.55, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %if.end, %entry
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user