diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index d18ecf91ea4..83b59bae838 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -349,6 +349,11 @@ void RAFast::usePhysReg(MachineOperand &MO) { unsigned PhysReg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); + + // Ignore undef uses. + if (MO.isUndef()) + return; + markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { case regDisabled: diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index e996f2ee838..401a8e9ce09 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -37,6 +37,7 @@ set(sources X86WinEHState.cpp X86OptimizeLEAs.cpp X86FixupBWInsts.cpp + X86WinAllocaExpander.cpp ) add_llvm_target(X86CodeGen ${sources}) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index c12554c9b92..27b428e2151 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -59,6 +59,9 @@ FunctionPass *createX86FixupLEAs(); /// recalculations. FunctionPass *createX86OptimizeLEAs(); +/// Return a pass that expands WinAlloca pseudo-instructions. +FunctionPass *createX86WinAllocaExpander(); + /// Return a pass that optimizes the code-size of x86 call sequences. This is /// done by replacing esp-relative movs with pushes. FunctionPass *createX86CallFrameOptimization(); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 368e265f8e5..eb9fddf8970 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16563,14 +16563,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); } else { - SDValue Flag; - const unsigned Reg = (Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX); - - Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); - Flag = Chain.getValue(1); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - - Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); + Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size); + MF.getInfo()->setHasWinAlloca(true); const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned SPReg = RegInfo->getStackRegister(); @@ -23228,18 +23223,6 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, return continueMBB; } -MachineBasicBlock * -X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, - MachineBasicBlock *BB) const { - assert(!Subtarget.isTargetMachO()); - DebugLoc DL = MI->getDebugLoc(); - MachineInstr *ResumeMI = Subtarget.getFrameLowering()->emitStackProbe( - *BB->getParent(), *BB, MI, DL, false); - MachineBasicBlock *ResumeBB = ResumeMI->getParent(); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return ResumeBB; -} - MachineBasicBlock * X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -23702,8 +23685,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::TLS_base_addr32: case X86::TLS_base_addr64: return EmitLoweredTLSAddr(MI, BB); - case X86::WIN_ALLOCA: - return EmitLoweredWinAlloca(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 52c4553082d..4cf35d2436b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1162,9 +1162,6 @@ namespace llvm { MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredCatchRet(MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 5efa7c74fab..66326eaf50b 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -99,18 +99,6 @@ def VAARG_64 : I<0, Pseudo, (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), (implicit EFLAGS)]>; -// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows -// targets. These calls are needed to probe the stack when allocating more than -// 4k bytes in one go. Touching the stack at 4K increments is necessary to -// ensure that the guard pages used by the OS virtual memory manager are -// allocated in correct sequence. -// The main point of having separate instruction are extra unmodelled effects -// (compared to ordinary calls) like stack pointer change. - -let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in - def WIN_ALLOCA : I<0, Pseudo, (outs), (ins), - "# dynamic stack allocation", - [(X86WinAlloca)]>; // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is @@ -132,6 +120,27 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), Requires<[In64BitMode]>; } +// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows +// targets. These calls are needed to probe the stack when allocating more than +// 4k bytes in one go. Touching the stack at 4K increments is necessary to +// ensure that the guard pages used by the OS virtual memory manager are +// allocated in correct sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in +def WIN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size), + "# dynamic stack allocation", + [(X86WinAlloca GR32:$size)]>, + Requires<[NotLP64]>; + +let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in +def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size), + "# dynamic stack allocation", + [(X86WinAlloca GR64:$size)]>, + Requires<[In64BitMode]>; + + //===----------------------------------------------------------------------===// // EH Pseudo Instructions // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index dcd3f5395ab..18c74555a93 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -112,6 +112,8 @@ def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86WIN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; + def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -273,8 +275,8 @@ def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; -def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA, + [SDNPHasChain, SDNPOutGlue]>; def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, [SDNPHasChain]>; diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index 4632adaf1e7..d517d82537a 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -98,6 +98,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// True if this function uses the red zone. bool UsesRedZone = false; + /// True if this function has WIN_ALLOCA instructions. + bool HasWinAlloca = false; + private: /// ForwardedMustTailRegParms - A list of virtual and physical registers /// that must be forwarded to every musttail call. @@ -172,6 +175,9 @@ public: bool getUsesRedZone() const { return UsesRedZone; } void setUsesRedZone(bool V) { UsesRedZone = V; } + + bool hasWinAlloca() const { return HasWinAlloca; } + void setHasWinAlloca(bool v) { HasWinAlloca = v; } }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index cde3197c58c..1fe98a66c7c 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -266,6 +266,7 @@ void X86PassConfig::addPreRegAlloc() { addPass(createX86OptimizeLEAs()); addPass(createX86CallFrameOptimization()); + addPass(createX86WinAllocaExpander()); } void X86PassConfig::addPostRegAlloc() { diff --git a/lib/Target/X86/X86WinAllocaExpander.cpp b/lib/Target/X86/X86WinAllocaExpander.cpp new file mode 100644 index 00000000000..cc82074e685 --- /dev/null +++ b/lib/Target/X86/X86WinAllocaExpander.cpp @@ -0,0 +1,294 @@ +//===----- X86WinAllocaExpander.cpp - Expand WinAlloca pseudo instruction -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pass that expands WinAlloca pseudo-instructions. +// +// It performs a conservative analysis to determine whether each allocation +// falls within a region of the stack that is safe to use, or whether stack +// probes must be emitted. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +namespace { + +class X86WinAllocaExpander : public MachineFunctionPass { +public: + X86WinAllocaExpander() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// Strategies for lowering a WinAlloca. + enum Lowering { TouchAndSub, Sub, Probe }; + + /// Deterministic-order map from WinAlloca instruction to desired lowering. + typedef MapVector LoweringMap; + + /// Compute which lowering to use for each WinAlloca instruction. + void computeLowerings(MachineFunction &MF, LoweringMap& Lowerings); + + /// Get the appropriate lowering based on current offset and amount. + Lowering getLowering(int64_t CurrentOffset, int64_t AllocaAmount); + + /// Lower a WinAlloca instruction. + void lower(MachineInstr* MI, Lowering L); + + MachineRegisterInfo *MRI; + const X86Subtarget *STI; + const TargetInstrInfo *TII; + const X86RegisterInfo *TRI; + unsigned StackPtr; + unsigned SlotSize; + int64_t StackProbeSize; + + const char *getPassName() const override { return "X86 WinAlloca Expander"; } + static char ID; +}; + +char X86WinAllocaExpander::ID = 0; + +} // end anonymous namespace + +FunctionPass *llvm::createX86WinAllocaExpander() { + return new X86WinAllocaExpander(); +} + +/// Return the allocation amount for a WinAlloca instruction, or -1 if unknown. +static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) { + assert(MI->getOpcode() == X86::WIN_ALLOCA_32 || + MI->getOpcode() == X86::WIN_ALLOCA_64); + assert(MI->getOperand(0).isReg()); + + unsigned AmountReg = MI->getOperand(0).getReg(); + MachineInstr *Def = MRI->getUniqueVRegDef(AmountReg); + + // Look through copies. + while (Def && Def->isCopy() && Def->getOperand(1).isReg()) + Def = MRI->getUniqueVRegDef(Def->getOperand(1).getReg()); + + if (!Def || + (Def->getOpcode() != X86::MOV32ri && Def->getOpcode() != X86::MOV64ri) || + !Def->getOperand(1).isImm()) + return -1; + + return Def->getOperand(1).getImm(); +} + +X86WinAllocaExpander::Lowering +X86WinAllocaExpander::getLowering(int64_t CurrentOffset, + int64_t AllocaAmount) { + // For a non-constant amount or a large amount, we have to probe. + if (AllocaAmount < 0 || AllocaAmount > StackProbeSize) + return Probe; + + // If it fits within the safe region of the stack, just subtract. + if (CurrentOffset + AllocaAmount <= StackProbeSize) + return Sub; + + // Otherwise, touch the current tip of the stack, then subtract. + return TouchAndSub; +} + +static bool isPushPop(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case X86::PUSH32i8: + case X86::PUSH32r: + case X86::PUSH32rmm: + case X86::PUSH32rmr: + case X86::PUSHi32: + case X86::PUSH64i8: + case X86::PUSH64r: + case X86::PUSH64rmm: + case X86::PUSH64rmr: + case X86::PUSH64i32: + case X86::POP32r: + case X86::POP64r: + return true; + default: + return false; + } +} + +void X86WinAllocaExpander::computeLowerings(MachineFunction &MF, + LoweringMap &Lowerings) { + // Do a one-pass reverse post-order walk of the CFG to conservatively estimate + // the offset between the stack pointer and the lowest touched part of the + // stack, and use that to decide how to lower each WinAlloca instruction. + + // Initialize OutOffset[B], the stack offset at exit from B, to something big. + DenseMap OutOffset; + for (MachineBasicBlock &MBB : MF) + OutOffset[&MBB] = INT32_MAX; + + // Note: we don't know the offset at the start of the entry block since the + // prologue hasn't been inserted yet, and how much that will adjust the stack + // pointer depends on register spills, which have not been computed yet. + + // Compute the reverse post-order. + ReversePostOrderTraversal RPO(&MF); + + for (MachineBasicBlock *MBB : RPO) { + int64_t Offset = -1; + for (MachineBasicBlock *Pred : MBB->predecessors()) + Offset = std::max(Offset, OutOffset[Pred]); + if (Offset == -1) Offset = INT32_MAX; + + for (MachineInstr &MI : *MBB) { + if (MI.getOpcode() == X86::WIN_ALLOCA_32 || + MI.getOpcode() == X86::WIN_ALLOCA_64) { + // A WinAlloca moves StackPtr, and potentially touches it. + int64_t Amount = getWinAllocaAmount(&MI, MRI); + Lowering L = getLowering(Offset, Amount); + Lowerings[&MI] = L; + switch (L) { + case Sub: + Offset += Amount; + break; + case TouchAndSub: + Offset = Amount; + break; + case Probe: + Offset = 0; + break; + } + } else if (MI.isCall() || isPushPop(MI)) { + // Calls, pushes and pops touch the tip of the stack. + Offset = 0; + } else if (MI.getOpcode() == X86::ADJCALLSTACKUP32 || + MI.getOpcode() == X86::ADJCALLSTACKUP64) { + Offset -= MI.getOperand(0).getImm(); + } else if (MI.getOpcode() == X86::ADJCALLSTACKDOWN32 || + MI.getOpcode() == X86::ADJCALLSTACKDOWN64) { + Offset += MI.getOperand(0).getImm(); + } else if (MI.modifiesRegister(StackPtr, TRI)) { + // Any other modification of SP means we've lost track of it. + Offset = INT32_MAX; + } + } + + OutOffset[MBB] = Offset; + } +} + +static unsigned getSubOpcode(bool Is64Bit, int64_t Amount) { + if (Is64Bit) + return isInt<8>(Amount) ? X86::SUB64ri8 : X86::SUB64ri32; + return isInt<8>(Amount) ? X86::SUB32ri8 : X86::SUB32ri; +} + +void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) { + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator I = *MI; + + int64_t Amount = getWinAllocaAmount(MI, MRI); + if (Amount == 0) { + MI->eraseFromParent(); + return; + } + + bool Is64Bit = STI->is64Bit(); + assert(SlotSize == 4 || SlotSize == 8); + unsigned RegA = (SlotSize == 8) ? X86::RAX : X86::EAX; + + switch (L) { + case TouchAndSub: + assert(Amount >= SlotSize); + + // Use a push to touch the top of the stack. + BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(RegA, RegState::Undef); + Amount -= SlotSize; + if (!Amount) + break; + + // Fall through to make any remaining adjustment. + case Sub: + assert(Amount > 0); + if (Amount == SlotSize) { + // Use push to save size. + BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(RegA, RegState::Undef); + } else { + // Sub. + BuildMI(*MBB, I, DL, TII->get(getSubOpcode(Is64Bit, Amount)), StackPtr) + .addReg(StackPtr) + .addImm(Amount); + } + break; + case Probe: + // The probe lowering expects the amount in RAX/EAX. + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), RegA) + .addReg(MI->getOperand(0).getReg()); + + // Do the probe. + STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL, + /*InPrologue=*/false); + break; + } + + unsigned AmountReg = MI->getOperand(0).getReg(); + MI->eraseFromParent(); + + // Delete the definition of AmountReg, possibly walking a chain of copies. + for (;;) { + if (!MRI->use_empty(AmountReg)) + break; + MachineInstr *AmountDef = MRI->getUniqueVRegDef(AmountReg); + if (!AmountDef) + break; + if (AmountDef->isCopy() && AmountDef->getOperand(1).isReg()) + AmountReg = AmountDef->getOperand(1).isReg(); + AmountDef->eraseFromParent(); + break; + } +} + +bool X86WinAllocaExpander::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getInfo()->hasWinAlloca()) + return false; + + MRI = &MF.getRegInfo(); + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + StackPtr = TRI->getStackRegister(); + SlotSize = TRI->getSlotSize(); + + StackProbeSize = 4096; + if (MF.getFunction()->hasFnAttribute("stack-probe-size")) { + MF.getFunction() + ->getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + } + + LoweringMap Lowerings; + computeLowerings(MF, Lowerings); + for (auto &P : Lowerings) + lower(P.first, P.second); + + return true; +} diff --git a/test/CodeGen/X86/cleanuppad-inalloca.ll b/test/CodeGen/X86/cleanuppad-inalloca.ll index 2e34ada52e6..c0660fee2f1 100644 --- a/test/CodeGen/X86/cleanuppad-inalloca.ll +++ b/test/CodeGen/X86/cleanuppad-inalloca.ll @@ -38,8 +38,8 @@ ehcleanup: ; preds = %entry ; CHECK: pushl %ebp ; CHECK: movl %esp, %ebp ; CHECK: subl ${{[0-9]+}}, %esp -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll "??0A@@QAE@XZ" ; CHECK: calll _takes_two diff --git a/test/CodeGen/X86/dynamic-alloca-in-entry.ll b/test/CodeGen/X86/dynamic-alloca-in-entry.ll index 7ed471c2f50..2b5721d7fcf 100644 --- a/test/CodeGen/X86/dynamic-alloca-in-entry.ll +++ b/test/CodeGen/X86/dynamic-alloca-in-entry.ll @@ -15,5 +15,5 @@ define void @bar() { ret void } ; CHECK-LABEL: _bar: -; CHECK: calll __chkstk +; CHECK: pushl %eax ; CHECK: retl diff --git a/test/CodeGen/X86/inalloca-ctor.ll b/test/CodeGen/X86/inalloca-ctor.ll index eba4e72f933..f13d537d90b 100644 --- a/test/CodeGen/X86/inalloca-ctor.ll +++ b/test/CodeGen/X86/inalloca-ctor.ll @@ -12,8 +12,8 @@ define void @g() { entry: %args = alloca inalloca %frame %c = getelementptr %frame, %frame* %args, i32 0, i32 2 -; CHECK: movl $20, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: subl $16, %esp ; CHECK: movl %esp, call void @Foo_ctor(%Foo* %c) ; CHECK: leal 12(%{{.*}}), diff --git a/test/CodeGen/X86/inalloca-invoke.ll b/test/CodeGen/X86/inalloca-invoke.ll index 9a184e563b1..d90e5012ba4 100644 --- a/test/CodeGen/X86/inalloca-invoke.ll +++ b/test/CodeGen/X86/inalloca-invoke.ll @@ -21,7 +21,8 @@ blah: %beg = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 0 %end = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 1 -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: subl $20, %esp ; CHECK: movl %esp, %[[beg:[^ ]*]] ; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]] diff --git a/test/CodeGen/X86/inalloca-stdcall.ll b/test/CodeGen/X86/inalloca-stdcall.ll index 4f7e4092a99..69d94d8bfa7 100644 --- a/test/CodeGen/X86/inalloca-stdcall.ll +++ b/test/CodeGen/X86/inalloca-stdcall.ll @@ -8,8 +8,8 @@ declare x86_stdcallcc void @i(i32 %a) define void @g() { ; CHECK-LABEL: _g: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 diff --git a/test/CodeGen/X86/inalloca.ll b/test/CodeGen/X86/inalloca.ll index e523c945a69..134de2f58dd 100644 --- a/test/CodeGen/X86/inalloca.ll +++ b/test/CodeGen/X86/inalloca.ll @@ -8,8 +8,8 @@ define void @a() { ; CHECK-LABEL: _a: entry: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 @@ -28,8 +28,8 @@ define void @b() { ; CHECK-LABEL: _b: entry: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 @@ -49,8 +49,8 @@ define void @c() { ; CHECK-LABEL: _c: entry: %b = alloca inalloca %Foo -; CHECK: movl $8, %eax -; CHECK: calll __chkstk +; CHECK: pushl %eax +; CHECK: pushl %eax %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 diff --git a/test/CodeGen/X86/shrink-wrap-chkstk.ll b/test/CodeGen/X86/shrink-wrap-chkstk.ll index 94501b5764e..099ef137d8d 100644 --- a/test/CodeGen/X86/shrink-wrap-chkstk.ll +++ b/test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" -%struct.S = type { [12 x i8] } +%struct.S = type { [8192 x i8] } define x86_thiscallcc void @call_inalloca(i1 %x) { entry: @@ -29,7 +29,7 @@ bb2: ; CHECK-LABEL: _call_inalloca: # @call_inalloca ; CHECK: pushl %ebp ; CHECK: movl %esp, %ebp -; CHECK: movl $12, %eax +; CHECK: movl $8192, %eax ; CHECK: calll __chkstk ; CHECK: calll _inalloca_params ; CHECK: movl %ebp, %esp diff --git a/test/CodeGen/X86/win-alloca-expander.ll b/test/CodeGen/X86/win-alloca-expander.ll new file mode 100644 index 00000000000..45ca3b214ab --- /dev/null +++ b/test/CodeGen/X86/win-alloca-expander.ll @@ -0,0 +1,154 @@ +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s +; RUN: llc < %s -mtriple=i686-pc-win32 -O0 + +%struct.S = type { [1024 x i8] } +%struct.T = type { [3000 x i8] } +%struct.U = type { [10000 x i8] } + +define void @basics() { +; CHECK-LABEL: basics: +entry: + br label %bb1 + +; Allocation move sizes should have been removed. +; CHECK-NOT: movl $1024 +; CHECK-NOT: movl $3000 + +bb1: + %p0 = alloca %struct.S +; The allocation is small enough not to require stack probing, but the %esp +; offset after the prologue is not known, so the stack must be touched before +; the pointer is adjusted. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + + %saved_stack = tail call i8* @llvm.stacksave() + + %p1 = alloca %struct.S +; We know the %esp offset from above, so there is no need to touch the stack +; before adjusting it. +; CHECK: subl $1024, %esp + + %p2 = alloca %struct.T +; The offset is now 2048 bytes, so allocating a T must touch the stack again. +; CHECK: pushl %eax +; CHECK: subl $2996, %esp + + call void @f(%struct.S* %p0) +; CHECK: calll + + %p3 = alloca %struct.T +; The call above touched the stack, so there is room for a T object. +; CHECK: subl $3000, %esp + + %p4 = alloca %struct.U +; The U object is large enough to require stack probing. +; CHECK: movl $10000, %eax +; CHECK: calll __chkstk + + %p5 = alloca %struct.T +; The stack probing above touched the tip of the stack, so there's room for a T. +; CHECK: subl $3000, %esp + + call void @llvm.stackrestore(i8* %saved_stack) + %p6 = alloca %struct.S +; The stack restore means we lose track of the stack pointer and must probe. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + +; Use the pointers so they're not optimized away. + call void @f(%struct.S* %p1) + call void @g(%struct.T* %p2) + call void @g(%struct.T* %p3) + call void @h(%struct.U* %p4) + call void @g(%struct.T* %p5) + ret void +} + +define void @loop() { +; CHECK-LABEL: loop: +entry: + br label %bb1 + +bb1: + %p1 = alloca %struct.S +; The entry offset is unknown; touch-and-sub. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + br label %loop1 + +loop1: + %i1 = phi i32 [ 10, %bb1 ], [ %dec1, %loop1 ] + %p2 = alloca %struct.S +; We know the incoming offset from bb1, but from the back-edge, we assume the +; worst, and therefore touch-and-sub to allocate. +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + %dec1 = sub i32 %i1, 1 + %cmp1 = icmp sgt i32 %i1, 0 + br i1 %cmp1, label %loop1, label %end +; CHECK: decl +; CHECK: jg + +end: + call void @f(%struct.S* %p1) + call void @f(%struct.S* %p2) + ret void +} + +define void @probe_size_attribute() "stack-probe-size"="512" { +; CHECK-LABEL: probe_size_attribute: +entry: + br label %bb1 + +bb1: + %p0 = alloca %struct.S +; The allocation would be small enough not to require probing, if it wasn't +; for the stack-probe-size attribute. +; CHECK: movl $1024, %eax +; CHECK: calll __chkstk + call void @f(%struct.S* %p0) + ret void +} + +define void @cfg(i1 %x, i1 %y) { +; Test that the blocks are analyzed in the correct order. +; CHECK-LABEL: cfg: +entry: + br i1 %x, label %bb1, label %bb2 + +bb1: + %p1 = alloca %struct.S +; CHECK: pushl %eax +; CHECK: subl $1020, %esp + br label %bb3 +bb2: + %p2 = alloca %struct.T +; CHECK: pushl %eax +; CHECK: subl $2996, %esp + br label %bb3 + +bb3: + br i1 %y, label %bb4, label %bb5 + +bb4: + %p4 = alloca %struct.S +; CHECK: subl $1024, %esp + call void @f(%struct.S* %p4) + ret void + +bb5: + %p5 = alloca %struct.T +; CHECK: pushl %eax +; CHECK: subl $2996, %esp + call void @g(%struct.T* %p5) + ret void +} + + +declare void @f(%struct.S*) +declare void @g(%struct.T*) +declare void @h(%struct.U*) + +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*)