Make variable argument intrinsics behave correctly in a Win64 CC function.

Summary:
This change makes the variable argument intrinsics, `llvm.va_start` and
`llvm.va_copy`, and the `va_arg` instruction behave as they do on Windows
inside a `CallingConv::X86_64_Win64` function. It's needed for a Clang patch
I have to add support for GCC's `__builtin_ms_va_list` constructs.

Reviewers: nadav, asl, eugenis

CC: llvm-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D1622

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245990 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Charles Davis 2015-08-25 23:27:41 +00:00
parent c3a33b1b54
commit 7e96f0f6ff
6 changed files with 191 additions and 58 deletions

View File

@ -901,6 +901,12 @@ public:
/// the target's desired shift amount type.
SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
/// Expand the specified \c ISD::VAARG node as the Legalize pass would.
SDValue expandVAArg(SDNode *Node);
/// Expand the specified \c ISD::VACOPY node as the Legalize pass would.
SDValue expandVACopy(SDNode *Node);
/// *Mutate* the specified node in-place to have the
/// specified operands. If the resultant node already exists in the DAG,
/// this does not modify the specified node, instead it returns the node that

View File

@ -3099,57 +3099,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
case ISD::VAARG: {
const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
EVT VT = Node->getValueType(0);
Tmp1 = Node->getOperand(0);
Tmp2 = Node->getOperand(1);
unsigned Align = Node->getConstantOperandVal(3);
SDValue VAListLoad =
DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2,
MachinePointerInfo(V), false, false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
DAG.getConstant(Align - 1, dl,
VAList.getValueType()));
VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList,
DAG.getConstant(-(int64_t)Align, dl,
VAList.getValueType()));
}
// Increment the pointer, VAList, to the next vaarg
Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(
VT.getTypeForEVT(*DAG.getContext())),
dl, VAList.getValueType()));
// Store the incremented VAList to the legalized pointer
Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
false, false, false, 0));
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
break;
}
case ISD::VACOPY: {
// This defaults to loading a pointer from the input and storing it to the
// output, returning the chain.
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl,
Node->getOperand(0), Node->getOperand(2),
MachinePointerInfo(VS), false, false, false, 0);
Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
MachinePointerInfo(VD), false, false, 0);
Results.push_back(Tmp1);
case ISD::VACOPY:
Results.push_back(DAG.expandVACopy(Node));
break;
}
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.

View File

@ -1855,6 +1855,57 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
}
SDValue SelectionDAG::expandVAArg(SDNode *Node) {
SDLoc dl(Node);
const TargetLowering &TLI = getTargetLoweringInfo();
const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
EVT VT = Node->getValueType(0);
SDValue Tmp1 = Node->getOperand(0);
SDValue Tmp2 = Node->getOperand(1);
unsigned Align = Node->getConstantOperandVal(3);
SDValue VAListLoad =
getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2,
MachinePointerInfo(V), false, false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
getConstant(Align - 1, dl, VAList.getValueType()));
VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
getConstant(-(int64_t)Align, dl, VAList.getValueType()));
}
// Increment the pointer, VAList, to the next vaarg
Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
getConstant(getDataLayout().getTypeAllocSize(
VT.getTypeForEVT(*getContext())),
dl, VAList.getValueType()));
// Store the incremented VAList to the legalized pointer
Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2,
MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(),
false, false, false, 0);
}
SDValue SelectionDAG::expandVACopy(SDNode *Node) {
SDLoc dl(Node);
const TargetLowering &TLI = getTargetLoweringInfo();
// This defaults to loading a pointer from the input and storing it to the
// output, returning the chain.
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl,
Node->getOperand(0), Node->getOperand(2),
MachinePointerInfo(VS), false, false, false, 0);
return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
MachinePointerInfo(VD), false, false, 0);
}
/// CreateStackTemporary - Create a stack temporary, suitable for holding the
/// specified value type.
SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {

View File

@ -484,8 +484,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
// TargetInfo::X86_64ABIBuiltinVaList
if (Subtarget->is64Bit()) {
setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
} else {
@ -15153,7 +15152,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SDLoc DL(Op);
if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
if (!Subtarget->is64Bit() ||
Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@ -15203,10 +15203,13 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->is64Bit() &&
"LowerVAARG only handles 64-bit va_arg!");
assert((Subtarget->isTargetLinux() ||
Subtarget->isTargetDarwin()) &&
"Unhandled target in LowerVAARG");
assert(Op.getNode()->getNumOperands() == 4);
MachineFunction &MF = DAG.getMachineFunction();
if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
// The Win64 ABI uses char* instead of a structure.
return DAG.expandVAArg(Op.getNode());
SDValue Chain = Op.getOperand(0);
SDValue SrcPtr = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@ -15234,8 +15237,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!Subtarget->useSoftFloat() &&
!(DAG.getMachineFunction().getFunction()->hasFnAttribute(
Attribute::NoImplicitFloat)) &&
!(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
@ -15264,8 +15266,14 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
// X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows,
// where a va_list is still an i8*.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
if (Subtarget->isCallingConvWin64(
DAG.getMachineFunction().getFunction()->getCallingConv()))
// Probably a Win64 va_copy.
return DAG.expandVACopy(Op.getNode());
SDValue Chain = Op.getOperand(0);
SDValue DstPtr = Op.getOperand(1);
SDValue SrcPtr = Op.getOperand(2);
@ -20034,7 +20042,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
if (!Subtarget->isTargetWin64()) {
if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);

View File

@ -2863,6 +2863,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
void visitVAStartInst(VAStartInst &I) override {
if (F.getCallingConv() == CallingConv::X86_64_Win64)
return;
IRBuilder<> IRB(&I);
VAStartInstrumentationList.push_back(&I);
Value *VAListTag = I.getArgOperand(0);
@ -2875,6 +2877,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
void visitVACopyInst(VACopyInst &I) override {
if (F.getCallingConv() == CallingConv::X86_64_Win64)
return;
IRBuilder<> IRB(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);

View File

@ -0,0 +1,108 @@
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-linux-gnu | FileCheck %s
; Verify that the var arg parameters which are passed in registers are stored
; in home stack slots allocated by the caller and that AP is correctly
; calculated.
define x86_64_win64cc void @average_va(i32 %count, ...) nounwind {
entry:
; CHECK: pushq
; CHECK: movq %r9, 40(%rsp)
; CHECK: movq %r8, 32(%rsp)
; CHECK: movq %rdx, 24(%rsp)
; CHECK: leaq 24(%rsp), %rax
%ap = alloca i8*, align 8 ; <i8**> [#uses=1]
%ap.0 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap.0)
ret void
}
declare void @llvm.va_start(i8*) nounwind
declare void @llvm.va_copy(i8*, i8*) nounwind
declare void @llvm.va_end(i8*) nounwind
; CHECK-LABEL: f5:
; CHECK: pushq
; CHECK: leaq 56(%rsp),
define x86_64_win64cc i8** @f5(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, ...) nounwind {
entry:
%ap = alloca i8*, align 8
%ap.0 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap.0)
ret i8** %ap
}
; CHECK-LABEL: f4:
; CHECK: pushq
; CHECK: leaq 48(%rsp),
define x86_64_win64cc i8** @f4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
entry:
%ap = alloca i8*, align 8
%ap.0 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap.0)
ret i8** %ap
}
; CHECK-LABEL: f3:
; CHECK: pushq
; CHECK: leaq 40(%rsp),
define x86_64_win64cc i8** @f3(i64 %a0, i64 %a1, i64 %a2, ...) nounwind {
entry:
%ap = alloca i8*, align 8
%ap.0 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap.0)
ret i8** %ap
}
; WinX86_64 uses char* for va_list. Verify that the correct amount of bytes
; are copied using va_copy.
; CHECK-LABEL: copy1:
; CHECK: leaq 32(%rsp), [[REG_copy1:%[a-z]+]]
; CHECK: movq [[REG_copy1]], 8(%rsp)
; CHECK: movq [[REG_copy1]], (%rsp)
; CHECK: ret
define x86_64_win64cc void @copy1(i64 %a0, ...) nounwind {
entry:
%ap = alloca i8*, align 8
%cp = alloca i8*, align 8
%ap.0 = bitcast i8** %ap to i8*
%cp.0 = bitcast i8** %cp to i8*
call void @llvm.va_start(i8* %ap.0)
call void @llvm.va_copy(i8* %cp.0, i8* %ap.0)
ret void
}
; CHECK-LABEL: copy4:
; CHECK: leaq 56(%rsp), [[REG_copy4:%[a-z]+]]
; CHECK: movq [[REG_copy4]], 8(%rsp)
; CHECK: movq [[REG_copy4]], (%rsp)
; CHECK: ret
define x86_64_win64cc void @copy4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
entry:
%ap = alloca i8*, align 8
%cp = alloca i8*, align 8
%ap.0 = bitcast i8** %ap to i8*
%cp.0 = bitcast i8** %cp to i8*
call void @llvm.va_start(i8* %ap.0)
call void @llvm.va_copy(i8* %cp.0, i8* %ap.0)
ret void
}
; CHECK-LABEL: arg4:
; va_start:
; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
; CHECK: movq [[REG_arg4_1]], (%rsp)
; va_arg:
; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
; CHECK: movq [[REG_arg4_2]], (%rsp)
; CHECK: movl 48(%rsp), %eax
; CHECK: ret
define x86_64_win64cc i32 @arg4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
entry:
%ap = alloca i8*, align 8
%ap.0 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap.0)
%tmp = va_arg i8** %ap, i32
ret i32 %tmp
}