[X86] Support cleaning more than 2**16 bytes of stack

The x86 ret instruction has a 16 bit immediate indicating how many bytes
to pop off of the stack beyond the return address.

There is a problem when extremely large structs are passed by value: we
might not be able to fit the number of bytes to pop into the return
instruction.

To fix this, expand RET_FLAG a little later and use a special sequence
to clean the stack:

pop  %ecx     ; return address is now in %ecx
add  $n, %esp ; clean the stack
push %ecx     ; bring the return address back on the stack
ret           ; pop the return address and jmp to it's value

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@262755 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
David Majnemer 2016-03-04 22:56:17 +00:00
parent 2c1ecb8c48
commit 029ad727ae
7 changed files with 57 additions and 8 deletions

View File

@ -152,6 +152,31 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MBB.erase(MBBI);
return true;
}
case X86::RET: {
// Adjust stack to erase error code
int64_t StackAdj = MBBI->getOperand(0).getImm();
MachineInstrBuilder MIB;
if (StackAdj == 0) {
MIB = BuildMI(MBB, MBBI, DL,
TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL));
} else if (isUInt<16>(StackAdj)) {
MIB = BuildMI(MBB, MBBI, DL,
TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL))
.addImm(StackAdj);
} else {
assert(!Is64Bit && "shouldn't need to do this for x86_64 targets!");
// A ret can only handle immediates as big as 2**16-1. If we need to pop
// off bytes before the return address, we must do it manually.
BuildMI(MBB, MBBI, DL, X86::POP32r).addReg(X86::ECX, RegState::Define);
X86FL->emitSPUpdate(MBB, MBBI, StackAdj, /*InEpilogue=*/true);
BuildMI(MBB, MBBI, DL, X86::PUSH32r).addReg(X86::ECX);
MIB = BuildMI(MBB, MBBI, DL, X86::RETL);
}
for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
MIB.addOperand(MBBI->getOperand(I));
MBB.erase(MBBI);
return true;
}
case X86::EH_RESTORE: {
// Restore ESP and EBP, and optionally ESI if required.
bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(

View File

@ -1509,6 +1509,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
return;
}
case X86::RET:
case X86::RETQ:
case X86::RETL:
case X86::RETIL:

View File

@ -159,6 +159,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
unsigned Opc = MBBI->getOpcode();
switch (Opc) {
default: return 0;
case X86::RET:
case X86::RETL:
case X86::RETQ:
case X86::RETIL:

View File

@ -2211,7 +2211,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
MVT::i16));
MVT::i32));
// Copy the result values into the output registers.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {

View File

@ -22,21 +22,21 @@
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
def RETL : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{l}", [(X86retflag 0)], IIC_RET>, OpSize32,
"ret{l}", [], IIC_RET>, OpSize32,
Requires<[Not64BitMode]>;
def RETQ : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{q}", [(X86retflag 0)], IIC_RET>, OpSize32,
"ret{q}", [], IIC_RET>, OpSize32,
Requires<[In64BitMode]>;
def RETW : I <0xC3, RawFrm, (outs), (ins),
"ret{w}",
[], IIC_RET>, OpSize16;
def RETIL : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{l}\t$amt",
[(X86retflag timm:$amt)], IIC_RET_IMM>, OpSize32,
[], IIC_RET_IMM>, OpSize32,
Requires<[Not64BitMode]>;
def RETIQ : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{q}\t$amt",
[(X86retflag timm:$amt)], IIC_RET_IMM>, OpSize32,
[], IIC_RET_IMM>, OpSize32,
Requires<[In64BitMode]>;
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
"ret{w}\t$amt",
@ -64,8 +64,8 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", [],
IIC_IRET>, Requires<[In64BitMode]>;
let isCodeGenOnly = 1 in
def IRET : PseudoI<(outs), (ins i16imm:$adj), [(X86iret timm:$adj)]>;
def IRET : PseudoI<(outs), (ins i32imm:$adj), [(X86iret timm:$adj)]>;
def RET : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retflag timm:$adj)]>;
}
// Unconditional branches.

View File

@ -76,7 +76,7 @@ def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,

View File

@ -0,0 +1,22 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-pc-windows-msvc"
define x86_fastcallcc i32 @test1(i32 inreg %V, [65533 x i8]* byval %p_arg) {
ret i32 %V
}
; CHECK-LABEL: @test1@65540:
; CHECK: movl %ecx, %eax
; CHECK-NEXT: popl %ecx
; CHECK-NEXT: addl $65536, %esp
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: retl
define x86_stdcallcc void @test2([65533 x i8]* byval %p_arg) {
ret void
}
; CHECK-LABEL: _test2@65536:
; CHECK: popl %ecx
; CHECK-NEXT: addl $65536, %esp
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: retl